Source code for violin.network

"""
network.py

Handles the functions necessary for finding paths within the model for VIOLIN
Created November 2019 - Casey Hansen MeLoDy Lab
Updated June 2025 - Haomiao Luo
"""
from typing import Union
import pandas as pd
import numpy as np
import networkx as nx
from violin.numeric import get_attributes, compare


[docs]def node_edge_list(model_df: pd.DataFrame) -> nx.DiGraph:
    """
    This function converts a model from the BioRECIPE format into a node-edge list for use with NetworkX.
    The converted network is a directed graph.

    Parameters
    ----------
    model_df : pd.DataFrame
        A model dataframe, must be in BioRECIPE format.

    Returns
    -------
    node_edge_list : nx.DiGraph
        A directed graph representation of the model.
    """

    # If elements are defined by variables, use the variable names. Else, use the common names
    if 'Listname' in model_df.columns and not model_df['Listname'].empty:
        target = 'Listname'
    elif 'Variable' in model_df.columns and not model_df['Variable'].empty:
        target = 'Variable'
    else:
        target = 'Element Name'

    # Subset of the model, just element and regulator columns
    graph = model_df[[target, 'Positive Regulator List', 'Negative Regulator List']].astype(str)
    # removes 'nan' placeholder
    graph = graph.replace('nan', '')

    # #remove excess punctuation from the regulator cells
    graph['Positive Regulator List'] = (graph['Positive Regulator List'].str.replace('[', '').
                                        str.replace(']', '').str.replace('\'', ''))
    graph['Negative Regulator List'] = (graph['Negative Regulator List'].str.replace('[', '').
                                        str.replace(']', '').str.replace('\'', ''))

    # combine regulators into one column, separated by '-' symbol
    # positiveRegulators_negativeRegulators
    graph['Regulators'] = graph['Positive Regulator List'] + '/////' + graph['Negative Regulator List']
    graph = graph.drop(columns=['Positive Regulator List', 'Negative Regulator List'])

    # Split each row by '-' character
    # This allows us to assign weights so that we know the "sign" (positive/negative) of the regulator
    # Even rows are positive regulators, odd rows are negative regulators
    graph = (graph.set_index([target]).stack().str.split('/////', expand=True).stack().
             unstack(-2).reset_index(-1, drop=True).reset_index())
    # Assign 'weights' to edge defined edge, 0 for positive regulators, 1 for negative regulators
    for y in range(graph.shape[0]):
        if y % 2 == 0:
            graph.at[y, 'weight'] = 0
        else:
            graph.at[y, 'weight'] = 1

    # Remove rows without a regulator node (housekeeping)
    graph = graph.replace(r'^\s*$', np.nan, regex=True).dropna()
    graph = (graph.set_index([target, 'weight']).stack().str.split(',', expand=True).stack().
             unstack(-2).reset_index(-1, drop=True).reset_index())
    # Remove any lingering whitespace
    graph['Regulators'] = graph['Regulators'].str.strip()
    graph[target] = graph[target].str.strip()
    # Output edgelist
    # graph.to_csv(r'/NodeEdgeList.csv')
    # Create NetworkX directed graph
    node_edge_list_ = nx.from_pandas_edgelist(graph, 'Regulators', target, 'weight', create_using=nx.DiGraph())
    return node_edge_list_


[docs]def path_finding(regulator: str,
                 regulated: str,
                 sign: str,
                 model_df: pd.DataFrame,
                 graph: nx.DiGraph,
                 kind_values: dict,
                 reading_cxn_type: str,
                 reading_atts: dict,
                 attributes: list,
                 scheme='1') -> Union[str, int]:
    """
    This function searches for a path in the model, where the source and target are the identifier of the matched elements,
    and calculates the kind score based on the results. The Dijkstra's algorithm is used to find the shortest path.
    The path is identified as a negatively regulation between source and target, if the sum of edge weights is an odd number, and vice versa.

    Parameters
    ----------
    regulator : str
        An identifier of the source node.
    regulated : str
        An identifier of the target node.
    sign : str
        A sign of interaction from the Interaction Set (IS). available options: ['positive', 'negative'].
    model_df : pd.DataFrame
        Model dataframe
    graph : nx.DiGraph
        Model edgelist to create network for finding paths between elements.
    kind_values : dict
        Dictionary containing the numerical values for the Kind Score classifications.
    reading_cxn_type : str
        Connection Type of interaction from reading - 'i' for indirect, 'd' for direct.
    reading_atts: dict
        attributes from interaction, where keys are attributes names and values are attributes values.
    attributes: list
        attributes list for reading file.
    scheme: str
        The scheme of classification, i.e. '1', '2', or '3'.

    Returns
    -------
    kind : int
        Kind Score value for the interaction.
    """

    # Sign of regulator; assigned same numbering as node_edge_list() function
    # negativeRegulators = 1; positiveRegulators = 0
    if sign == 'Negative':
        sign = 1
    else:
        sign = 0

    # Have to make sure regulator and regulated are in the directed graph representation of the model
    # Some nodes may be in the model, but aren't regulated/regulators anywhere
    if (regulator in graph) and (regulated in graph):
        # If there is a path of the same direction and LEE = D: internal extension
        if nx.has_path(graph, regulator, regulated) and len(
                nx.shortest_path(graph, source=regulator, target=regulated)) > 1 and reading_cxn_type == "d":
            if scheme in ['1', '3']:
                kind = kind_values['internal extension']
            elif scheme == '2':
                kind = kind_values['path mismatch']
            else:
                raise ValueError('Enter a right scheme (1, 2, 3).')
        # If there is a path of the same direction and LEE = I: check sign and attributes
        elif nx.has_path(graph, regulator, regulated) and len(
                nx.shortest_path(graph, source=regulator, target=regulated)) > 1 and reading_cxn_type == "i":
            # Finding atts of beginning and end of path
            s_idx = list(model_df['Listname']).index(regulator)
            t_idx = list(model_df['Listname']).index(regulated)
            # No need to assign value to `sign` since no influence attributes need to compare
            # sign is ambiguous in path
            model_atts = get_attributes(s_idx, t_idx, sign, model_df, attributes, path=True)
            compare_atts = compare(model_atts, reading_atts)

            # Finding Path sign
            # path list
            path = nx.shortest_path(graph, source=regulator, target=regulated, weight='weight')
            # Check path sign
            path_wgt = 0
            idx = 0
            # Sum the edge weights to determine the overall effect
            while idx < len(path) - 1:
                path_wgt += graph[path[idx]][path[idx + 1]]['weight']
                idx += 1
            # if %2 = 0, then positive regulation, if %2 = 1, then negative regulation
            # Weak corroboration - regulation matches reading
            if path_wgt % 2 == sign and compare_atts in [0, 1, 2]:
                kind = kind_values[('path corroboration'
                                    '')]
            # Flagged - Regulation same sign, but contradictory attributes
            elif path_wgt % 2 == sign and compare_atts == 3:
                if scheme in ['1', '3']:
                    kind = kind_values['path mismatch']
                elif scheme == '2':
                    kind = str(kind_values['att contradiction'])
            # Flagged - Regulation opposite sign as reading
            else:
                if scheme in ['1', '3']:
                    kind = kind_values['path mismatch']
                elif scheme == '2':
                    kind = str(kind_values['sign contradiction'])

        # If there is a path of the opposite direction - Flagged
        elif nx.has_path(graph, regulated, regulator) and len(
                nx.shortest_path(graph, source=regulated, target=regulator)) > 1:
            if scheme in ['1', '3']:
                kind = kind_values['path mismatch']
            elif scheme == '2':
                kind = str(kind_values['dir contradiction'])

        # If there is no path
        else:
            kind = kind_values['internal extension']
    else:
        kind = kind_values['internal extension']

    return kind