Source code for violin.network

"""
network.py

Handles the functions necessary for finding paths within the model for VIOLIN
Created November 2019 - Casey Hansen MeLoDy Lab
Updated June 2025 - Haomiao Luo
"""
from typing import Union
import pandas as pd
import numpy as np
import networkx as nx
from violin.numeric import get_attributes, compare


[docs]def node_edge_list(model_df: pd.DataFrame) -> nx.DiGraph: """ This function converts a model from the BioRECIPE format into a node-edge list for use with NetworkX. The converted network is a directed graph. Parameters ---------- model_df : pd.DataFrame A model dataframe, must be in BioRECIPE format. Returns ------- node_edge_list : nx.DiGraph A directed graph representation of the model. """ # If elements are defined by variables, use the variable names. Else, use the common names if 'Listname' in model_df.columns and not model_df['Listname'].empty: target = 'Listname' elif 'Variable' in model_df.columns and not model_df['Variable'].empty: target = 'Variable' else: target = 'Element Name' # Subset of the model, just element and regulator columns graph = model_df[[target, 'Positive Regulator List', 'Negative Regulator List']].astype(str) # removes 'nan' placeholder graph = graph.replace('nan', '') # #remove excess punctuation from the regulator cells graph['Positive Regulator List'] = (graph['Positive Regulator List'].str.replace('[', ''). str.replace(']', '').str.replace('\'', '')) graph['Negative Regulator List'] = (graph['Negative Regulator List'].str.replace('[', ''). str.replace(']', '').str.replace('\'', '')) # combine regulators into one column, separated by '-' symbol # positiveRegulators_negativeRegulators graph['Regulators'] = graph['Positive Regulator List'] + '/////' + graph['Negative Regulator List'] graph = graph.drop(columns=['Positive Regulator List', 'Negative Regulator List']) # Split each row by '-' character # This allows us to assign weights so that we know the "sign" (positive/negative) of the regulator # Even rows are positive regulators, odd rows are negative regulators graph = (graph.set_index([target]).stack().str.split('/////', expand=True).stack(). unstack(-2).reset_index(-1, drop=True).reset_index()) # Assign 'weights' to edge defined edge, 0 for positive regulators, 1 for negative regulators for y in range(graph.shape[0]): if y % 2 == 0: graph.at[y, 'weight'] = 0 else: graph.at[y, 'weight'] = 1 # Remove rows without a regulator node (housekeeping) graph = graph.replace(r'^\s*$', np.nan, regex=True).dropna() graph = (graph.set_index([target, 'weight']).stack().str.split(',', expand=True).stack(). unstack(-2).reset_index(-1, drop=True).reset_index()) # Remove any lingering whitespace graph['Regulators'] = graph['Regulators'].str.strip() graph[target] = graph[target].str.strip() # Output edgelist # graph.to_csv(r'/NodeEdgeList.csv') # Create NetworkX directed graph node_edge_list_ = nx.from_pandas_edgelist(graph, 'Regulators', target, 'weight', create_using=nx.DiGraph()) return node_edge_list_
[docs]def path_finding(regulator: str, regulated: str, sign: str, model_df: pd.DataFrame, graph: nx.DiGraph, kind_values: dict, reading_cxn_type: str, reading_atts: dict, attributes: list, scheme='1') -> Union[str, int]: """ This function searches for a path in the model, where the source and target are the identifier of the matched elements, and calculates the kind score based on the results. The Dijkstra's algorithm is used to find the shortest path. The path is identified as a negatively regulation between source and target, if the sum of edge weights is an odd number, and vice versa. Parameters ---------- regulator : str An identifier of the source node. regulated : str An identifier of the target node. sign : str A sign of interaction from the Interaction Set (IS). available options: ['positive', 'negative']. model_df : pd.DataFrame Model dataframe graph : nx.DiGraph Model edgelist to create network for finding paths between elements. kind_values : dict Dictionary containing the numerical values for the Kind Score classifications. reading_cxn_type : str Connection Type of interaction from reading - 'i' for indirect, 'd' for direct. reading_atts: dict attributes from interaction, where keys are attributes names and values are attributes values. attributes: list attributes list for reading file. scheme: str The scheme of classification, i.e. '1', '2', or '3'. Returns ------- kind : int Kind Score value for the interaction. """ # Sign of regulator; assigned same numbering as node_edge_list() function # negativeRegulators = 1; positiveRegulators = 0 if sign == 'Negative': sign = 1 else: sign = 0 # Have to make sure regulator and regulated are in the directed graph representation of the model # Some nodes may be in the model, but aren't regulated/regulators anywhere if (regulator in graph) and (regulated in graph): # If there is a path of the same direction and LEE = D: internal extension if nx.has_path(graph, regulator, regulated) and len( nx.shortest_path(graph, source=regulator, target=regulated)) > 1 and reading_cxn_type == "d": if scheme in ['1', '3']: kind = kind_values['internal extension'] elif scheme == '2': kind = kind_values['path mismatch'] else: raise ValueError('Enter a right scheme (1, 2, 3).') # If there is a path of the same direction and LEE = I: check sign and attributes elif nx.has_path(graph, regulator, regulated) and len( nx.shortest_path(graph, source=regulator, target=regulated)) > 1 and reading_cxn_type == "i": # Finding atts of beginning and end of path s_idx = list(model_df['Listname']).index(regulator) t_idx = list(model_df['Listname']).index(regulated) # No need to assign value to `sign` since no influence attributes need to compare # sign is ambiguous in path model_atts = get_attributes(s_idx, t_idx, sign, model_df, attributes, path=True) compare_atts = compare(model_atts, reading_atts) # Finding Path sign # path list path = nx.shortest_path(graph, source=regulator, target=regulated, weight='weight') # Check path sign path_wgt = 0 idx = 0 # Sum the edge weights to determine the overall effect while idx < len(path) - 1: path_wgt += graph[path[idx]][path[idx + 1]]['weight'] idx += 1 # if %2 = 0, then positive regulation, if %2 = 1, then negative regulation # Weak corroboration - regulation matches reading if path_wgt % 2 == sign and compare_atts in [0, 1, 2]: kind = kind_values[('path corroboration' '')] # Flagged - Regulation same sign, but contradictory attributes elif path_wgt % 2 == sign and compare_atts == 3: if scheme in ['1', '3']: kind = kind_values['path mismatch'] elif scheme == '2': kind = str(kind_values['att contradiction']) # Flagged - Regulation opposite sign as reading else: if scheme in ['1', '3']: kind = kind_values['path mismatch'] elif scheme == '2': kind = str(kind_values['sign contradiction']) # If there is a path of the opposite direction - Flagged elif nx.has_path(graph, regulated, regulator) and len( nx.shortest_path(graph, source=regulated, target=regulator)) > 1: if scheme in ['1', '3']: kind = kind_values['path mismatch'] elif scheme == '2': kind = str(kind_values['dir contradiction']) # If there is no path else: kind = kind_values['internal extension'] else: kind = kind_values['internal extension'] return kind