Source code for violin.scoring

"""
scoring.py

Handles the Match Score, Kind Score, and Epistemic Value functions for VIOLIN
Created November 2019 - Casey Hansen MeLoDy Lab
Updated June 2025 - Haomiao Luo
"""

import pandas as pd
import networkx as nx
from violin.numeric import get_attributes, find_element, compare
from violin.network import path_finding
from typing import List, Union
import logging 

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


# Default kind score dict - categories
# KIND_DICT = {"strong corroboration" : 2,
#                 "empty attribute" : 1,
#                 "indirect interaction" : 1,
#                 "path corroboration" : 1,
#                 "hanging extension" : 40,
#                 "full extension" : 40,
#                 "internal extension" : 40,
#                 "specification" : 30,
#                 "dir contradiction" : 10,
#                 "sign contradiction" : 10,
#                 "att contradiction" : 10,
#                 "dir mismatch" : 20,
#                 "path mismatch" : 20,
#                 "self-regulation" : 20}

KIND_DICT_A = {"strong corroboration" : 2,
                "empty attribute" : 1,
                "indirect interaction" : 3,
                "path corroboration" : 5,
                "specification" : 7,
                "hanging extension" : 40,
                "full extension" : 39,
                "internal extension" : 38,
                "dir contradiction" : 11,
                "sign contradiction" : 10,
                "att contradiction" : 9,
                "dir mismatch" : 20,
                "path mismatch" : 19,
                "self-regulation" : 18}

KIND_DICT_B = {"strong corroboration" : 2,
                "empty attribute" : 1,
                "indirect interaction" : 3,
                "path corroboration" : 5,
                "specification" : 7,
                "hanging extension" : 40,
                "full extension" : 39,
                "internal extension" : 38,
                "dir contradiction" : 11,
                "sign contradiction" : 10,
                "att contradiction" : 9,
                "dir mismatch" : 20,
                "path mismatch" : 19,
                "self-regulation" : 18,
                "flagged4" : 17,
                "flagged5" : 16}

MATCH_DICT = {"source present" : 1,
                "target present" : 100,
                "both present" : 10,
                "neither present" : 0.1}

# Default attributes list is empty
atts_list = []


[docs]def match_score(x:int, reading_df: pd.DataFrame, model_df: pd.DataFrame, match_values: dict=None) -> int: """ This function calculates the Match Score for an interaction from the reading. Parameters ---------- x : int A row index of the dataframe of Interaction set (IS) to be scored reading_df : pd.DataFrame The reading dataframe model_df : pd.DataFrame The model dataframe match_values : dict Dictionary assigning Match Score values Default values found in MATCH_DICT Returns ------- match : int Match Score value """ global MATCH_DICT regulated = False regulator = False reg_sign = reading_df.loc[x, 'Sign'] if match_values is None: match_values = MATCH_DICT # Search for regulated from reading in model if (find_element("name", reading_df.loc[x, 'Regulated Name'], reading_df.loc[x, 'Regulated Type'], model_df) != -1 or find_element("hgnc", reading_df.loc[x, 'Regulated HGNC Symbol'], reading_df.loc[x, 'Regulated Type'], model_df) != -1 or find_element("id", reading_df.loc[x, 'Regulated ID'], reading_df.loc[x, 'Regulated Type'], model_df, reading_df.loc[x, 'Regulated Database']) != -1 ): regulated = True # Search for regulator from reading in model if (find_element("name", reading_df.loc[x, 'Regulator Name'], reading_df.loc[x, 'Regulator Type'], model_df) != -1 or find_element("hgnc", reading_df.loc[x, 'Regulator HGNC Symbol'], reading_df.loc[x, 'Regulator Type'], model_df) != -1 or find_element("id", reading_df.loc[x, 'Regulator ID'], reading_df.loc[x, 'Regulator Type'], model_df, reading_df.loc[x, 'Regulator Database']) != -1 ): regulator = True # Scoring definition # Only the regulator in the model if not regulated and regulator: match = match_values['source present'] # Only the regulated in the model elif regulated and not regulator: match = match_values['target present'] # Both regulator and regulated in the model elif regulated and regulator: match = match_values['both present'] # Neither present in the model else: match = match_values['neither present'] return match
def _kind_score_to_model_int_id(row_idx: int, query_kind: int, kinds_list: List[Union[str, int]], model_t_indices: List[int], model_s_indices: List[int], counter_list: List[str], search_point: int=0, count_path:bool=False) -> str: """Function to record the interaction ID of the model that is classified. The function recursively records the unique IDs Parameters ---------- row_idx: int The row index of the interaction to be classified. query_kind : int The kind score to be classified. kinds_list : list The list of kind scores. model_t_indices : list The model target indices. model_s_indices : list The model source indices. counter_list: list A list to record the interactions that are identified as corroborated or contradicted interaction in model. search_point : int The point to start searching in the list of kind scores. count_path: bool An indicator whether to count the path or not. Default is False, because the path should not be an interaction in model. Returns ------- str The interaction ID of the model that is classified. """ # TODO: Implement path index finding for UI if count_path: raise NotImplementedError("Path finding is not implemented yet.") # Confirm that queried kind is in the list of kinds, starting from the srearch point if query_kind not in kinds_list[search_point:]: logger.info("%s: The queried kind %s is either not in the list of kinds or counted already." % (row_idx, query_kind)) # Exit function return # Find the index of the queried kind in the list of kinds current_kind_idx = kinds_list.index(query_kind, search_point) # Find the corresponding source and target indices in the model source_found = model_s_indices[current_kind_idx % len(model_s_indices)] target_found = model_t_indices[current_kind_idx // len(model_s_indices)] # Check if the interaction ID is already in the counter if '%s+%s' % (source_found, target_found) not in counter_list: counter_list.append('%s+%s' % (source_found, target_found)) return # If the interaction ID is already in the counter, check next kind is same as queried kind else: _kind_score_to_model_int_id(row_idx, query_kind, kinds_list, model_t_indices, model_s_indices, counter_list, search_point=current_kind_idx + 1)
[docs]def kind_score(x: int, model_df: pd.DataFrame, reading_df: pd.DataFrame, graph: nx.DiGraph, counter: dict, kind_values: dict=None, attributes: list=None, classify_scheme: str='1', mi_cxn: str='d') -> int: """ This function calculates the Kind Score for an interaction in the Interactions Set (iIS). The kind score will be used to represent the subcategories. For further details, please find out in: https://www.biorxiv.org/content/10.1101/2024.07.21.604448v1. Parameters ---------- x : int The row index for an iIS. model_df: pd.DataFrame The model dataframe reading_df : pd.DataFrame The reading dataframe. graph : nx.DiGraph A directed graph of the model,used when function calls path_finding module. counter: dict A dictionary to record the interactions that are identified as corroborated or contradicted interaction in model. Default value is None. kind_values : dict Dictionary assigning Kind Score values. Default values found in KIND_DICT_A and KIND_DICT_B. attributes : list A list of attributes compared between the model and the machine reading output. Default is None. classify_scheme: str The scheme of the classification ('1', '2', and '3'). Default is '1'. mi_cxn : str What connection type should be assigned to model interactions if not available. Accepted values are "d" (direct) or "i" (indirect). Deafult is "d". Returns ------- kind : int Kind Score, score value. """ # Initialize the parameters global MATCH_DICT assert (classify_scheme in ['1', '2', '3']) if kind_values is None: if classify_scheme in ['1', '2']: kind_values = KIND_DICT_A elif classify_scheme == '3': kind_values = KIND_DICT_B else: pass if attributes is None: match_score = [] assert (mi_cxn in ['d', 'i']) ### Finding interaction in Interactions Set (iIS) attributes ### # Finding iIS regulator sign signs = ['Negative', 'Positive'] if reading_df.loc[x, 'Sign'].lower() in ['activate', 'positive', 'increase']: reg_sign = 'Positive' else: reg_sign = 'Negative' signs.remove(reg_sign) opp_sign = signs[0] # Finding iIS Connection Type (if not in iIS input, default to indirect, 'i') if 'Connection Type' in reading_df.columns: iis_cxn_type = reading_df.loc[x, 'Connection Type'] else: iis_cxn_type = 'i' # Add full location information, if user want to compare location of the element if 'Regulated Compartment' in attributes and 'Regulated Compartment ID' not in attributes: attributes.insert(attributes.index('Regulated Compartment') + 1, 'Regulated Compartment ID') elif 'Regulated Compartment ID' in attributes and 'Regulated Compartment' not in attributes: attributes.insert(attributes.index('Regulated Compartment ID'), 'Regulated Compartment') else: pass if 'Regulator Compartment' in attributes and 'Regulator Compartment ID' not in attributes: attributes.insert(attributes.index('Regulator Compartment') + 1, 'Regulator Compartment ID') elif 'Regulator Compartment ID' in attributes and 'Regulator Compartment' not in attributes: attributes.insert(attributes.index('Regulator Compartment ID'), 'Regulator Compartment') else: pass # Create list for attributes (i.e., location attributes, context attributes, influence attributes) reading_atts = attributes # Finding iIS other attributes if len(attributes) > 0: # Attributes for iIS index 'x' reading_atts = {att: reading_df.loc[x, att] for att in reading_atts} else: reading_atts = {} # Comparing to model source_hgnc = find_element("hgnc", reading_df.loc[x, 'Regulator HGNC Symbol'], reading_df.loc[x, 'Regulator Type'], model_df) source_name = find_element("name", reading_df.loc[x, 'Regulator Name'], reading_df.loc[x, 'Regulator Type'], model_df) source_id = find_element("id", reading_df.loc[x, 'Regulator ID'], reading_df.loc[x, 'Regulator Type'], model_df, reading_df.loc[x, 'Regulator Database']) target_hgnc = find_element("hgnc", reading_df.loc[x, 'Regulated HGNC Symbol'], reading_df.loc[x, 'Regulated Type'], model_df) target_name = find_element("name", reading_df.loc[x, 'Regulated Name'], reading_df.loc[x, 'Regulated Type'], model_df) target_id = find_element("id", reading_df.loc[x, 'Regulated ID'], reading_df.loc[x, 'Regulated Type'], model_df, reading_df.loc[x, 'Regulated Database']) # Both regulator (source) and regulated (target) node found in the model if (source_name != -1 or source_hgnc != -1 or source_id != -1) and \ (target_name != -1 or target_hgnc != -1 or target_id != -1): # Find indices of regulator element (target) in model #FIXME: TBD for order # Privilege: HGNC > Name > ID if source_hgnc != -1: model_s_indices = source_hgnc elif source_name != -1: model_s_indices = source_name else: model_s_indices = source_id if target_hgnc != -1: model_t_indices = target_hgnc elif target_name != -1: model_t_indices = target_name else: model_t_indices = target_id kinds = [] # print(f"s:{model_s_indices}, t:{model_t_indices}") # Loop over each instance of the target and source in the model (since the same element may exist multiple status for t_idx in model_t_indices: # Regulator list in model model_s_list = model_df.loc[t_idx, reg_sign+' Regulator List'] \ if model_df.loc[t_idx, reg_sign+' Regulator List'] != 'nan' else 'nan' # Regulator list of opposite sign model_s_opp = model_df.loc[t_idx, opp_sign+' Regulator List'] \ if model_df.loc[t_idx, opp_sign+' Regulator List'] != 'nan' else 'nan' for s_idx in model_s_indices: source_listname = model_df.loc[s_idx, 'Listname'] target_listname = model_df.loc[t_idx, 'Listname'] # MI with match direction, match sign if str(model_s_list) != "nan" and source_listname in model_s_list.split(','): # Index of regulator name within regulator list s_index = model_s_list.split(',').index(source_listname) # Finding index MI regulator variable model_s_variable = model_df.loc[t_idx,reg_sign+' Regulator List'].split(',')[s_index] #model_s_element = list(model_df['Variable']).index(model_s_variable) # Find MI connection type if (reg_sign+' Connection Type List') in model_df.columns.values.tolist() and \ all(cxn_type.lower().strip() in ['i', 'd'] for cxn_type in model_df.loc[t_idx, reg_sign+' Connection Type List'].split(',')): # Connection type mi_cxn_type = model_df.loc[t_idx,reg_sign+' Connection Type List'].split(",")[s_index] else: mi_cxn_type = mi_cxn # List of model attributes to compare to reading attributes model_atts = get_attributes(t_idx, s_idx, reg_sign, model_df, attributes) # If iIS ="I" and MI = "I" or iIS = "D" and MI = "D": check attributes if (iis_cxn_type == "i" and mi_cxn_type == "i") or (iis_cxn_type == "d" and mi_cxn_type != "i"): compare_atts = compare(model_atts, reading_atts) # Strong Corroboration - perfect match if compare_atts == 0: kinds.append(kind_values['strong corroboration']) # Weak corroboration - the iIS presents less information than the model interaction elif compare_atts == 1: kinds.append(kind_values['empty attribute']) # Specification - the iIS presents new information elif compare_atts == 2: kinds.append(kind_values['specification']) # Contradiction - the iIS presents information that disputes the model interaction elif compare_atts == 3: kinds.append(kind_values['att contradiction']) # If iIS = "D" and MI = "I" elif iis_cxn_type == "d" and mi_cxn_type == "i": compare_atts = compare(model_atts, reading_atts) # If attributes are non-contradictory: iIS is a specification if compare_atts in [0,1,2]: kinds.append(kind_values['specification']) # Else: iIS is a contradiction elif compare_atts == 3: kinds.append(kind_values['att contradiction']) # If iIS ="I" and MI = "D": elif iis_cxn_type == "i" and mi_cxn_type == "d": compare_atts = compare(model_atts, reading_atts) #If attributes are non-contradictory: iIS is a weak corroboration if compare_atts in [0,1,2]: kinds.append(kind_values['indirect interaction']) #Else: iIS is a contradiction elif compare_atts == 3: kinds.append(kind_values['att contradiction']) # MI with Matched direction, Mismatched sign elif str(model_s_opp) != "nan" and source_listname in model_s_opp.split(','): reg_index = model_df.loc[t_idx, opp_sign + " Regulator List"].split(',').index( source_listname) # Finding connection type if (reg_sign+' Connection Type List') in model_df.columns.values.tolist() and \ all(cxn_type.lower().strip() in ['i', 'd'] for cxn_type in model_df.loc[t_idx, opp_sign+' Connection Type List'].split(',')): #Connection type mi_cxn_type = model_df.loc[t_idx, opp_sign + ' Connection Type List'].split(",")[reg_index] else: mi_cxn_type = mi_cxn # If iIS = "I" and MI = "D" if iis_cxn_type == "i" and mi_cxn_type != "i": if classify_scheme in ['1', '2']: kinds.append(kind_values['sign contradiction']) elif classify_scheme == '3': kinds.append(kind_values['flagged5']) else: #iIS is a Sign Contradiction, regardless of connection type kinds.append(kind_values['sign contradiction']) # MI with Mismatched direction, Matched sign elif (model_df.loc[s_idx, reg_sign + " Regulator List"] != "nan" and target_listname in model_df.loc[s_idx, reg_sign + " Regulator List"].split(',')): reg_index = model_df.loc[s_idx, reg_sign + " Regulator List"].split(',').index( target_listname) # Finding connection type if (reg_sign + ' Connection Type List') in model_df.columns.values.tolist() and \ all(cxn_type.lower().strip() in ['i', 'd'] for cxn_type in model_df.loc[s_idx, reg_sign+' Connection Type List'].split(',')): # Connection type mi_cxn_type = model_df.loc[s_idx, reg_sign + ' Connection Type List'].split(",")[reg_index] else: mi_cxn_type = mi_cxn # Finding index MI regulator variable model_reg_variable = model_df.loc[s_idx, reg_sign + ' Regulator List'].split(',')[reg_index] #model_reg_element = list(model_df['Variable']).index(model_reg_variable) # List of model attributes to compare to reading attributes model_atts = get_attributes(s_idx, t_idx, reg_sign, model_df, attributes) # iIS = "I" and MI = "I" if iis_cxn_type == "i" and mi_cxn_type == "i": kinds.append(kind_values['dir contradiction']) # iIS = "D" and MI = "D" elif iis_cxn_type == "d" and mi_cxn_type != "i": compare_atts = compare(model_atts, reading_atts) if classify_scheme in ['1', '2']: # If the attributes are not contradictory - Flagged for manual review if compare_atts in [0, 1, 2]: kinds.append(kind_values['dir mismatch']) # Else - Contradiction else: kinds.append(kind_values['dir contradiction']) elif classify_scheme == '3': kinds.append(kind_values['dir contradiction']) else: raise ValueError('Enter a right scheme number (1, 2, or 3).') # iIS = "I" and MI = "D" elif iis_cxn_type == "i" and mi_cxn_type != "i": compare_atts = compare(model_atts, reading_atts) if classify_scheme in ['1', '2']: # If the attributes are not contradictory - Flagged for manual review if compare_atts in [0, 1, 2]: kinds.append(kind_values['dir mismatch']) # Else - Contradiction else: kinds.append(kind_values['dir contradiction']) elif classify_scheme == '3': if compare_atts in [0, 1, 2]: kinds.append(kind_values['dir contradiction']) else: kinds.append(kind_values['flagged4']) else: raise ValueError('Enter a right scheme number (1, 2, or 3).') # iIS = "D" and MI = "I" elif iis_cxn_type == "d" and mi_cxn_type == "i": kinds.append(kind_values['dir contradiction']) #MI with Mismatched direction, Mismatched sign elif (model_df.loc[s_idx,opp_sign+" Regulator List"] != "nan" and target_listname in model_df.loc[s_idx,opp_sign+" Regulator List"].split(',')): reg_index = model_df.loc[s_idx,opp_sign+" Regulator List"].split(',').index(target_listname) #Finding connection type if (opp_sign+' Connection Type List') in model_df.columns.values.tolist()and \ all(cxn_type.lower().strip() in ['i', 'd'] for cxn_type in model_df.loc[s_idx, opp_sign+' Connection Type List'].split(',')): mi_cxn_type = model_df.loc[s_idx,opp_sign+' Connection Type List'].split(",")[reg_index] else: mi_cxn_type = mi_cxn #Finding index MI regulator variable model_reg_variable = model_df.loc[s_idx,opp_sign+' Regulator List'].split(',')[reg_index] #model_reg_element = list(model_df['Variable']).index(model_reg_variable) #List of model attributes to compare to reading attributes model_atts = get_attributes(s_idx, t_idx, opp_sign, model_df, attributes) # iIS = "D" and MI = "D" if iis_cxn_type == "d" and mi_cxn_type != "i": compare_atts = compare(model_atts, reading_atts) if classify_scheme in ['1', '2']: #If the attributes are not contradictory - Flagged for manual review if compare_atts in [0,1,2]: kinds.append(kind_values['dir mismatch']) #Else - Contradiction else: kinds.append(kind_values['dir contradiction']) elif classify_scheme == '3': if compare_atts in [0,1,2]: kinds.append(kind_values['dir contradiction']) else: kinds.append(kind_values['dir mismatch']) else: raise ValueError('Enter a right scheme (1, 2, or 3).') # iIS = "D" and MI = "i" elif iis_cxn_type == "d" and mi_cxn_type == "i": kinds.append(kind_values['dir contradiction']) # iIS = "i" and MI = "D" elif iis_cxn_type == "i" and mi_cxn_type != "i": compare_atts = compare(model_atts, reading_atts) #If the attributes are not contradictory - Flagged for manual review if compare_atts in [0, 1, 2]: kinds.append(kind_values['dir mismatch']) #Else - Contradiction else: if classify_scheme in ['1', '2']: kinds.append(kind_values['dir contradiction']) elif classify_scheme == '3': kinds.append(kind_values['flagged5']) # iIS = "i" and MI = "i" elif iis_cxn_type == "i" and mi_cxn_type == "i": kinds.append(kind_values['dir contradiction']) else: # If there is a self-regulation (regulator is both target and source) if t_idx == s_idx: kind = kind_values['self-regulation'] # If model does not contain interaction - check for path else: kinds.append(path_finding(source_listname,target_listname,reg_sign,model_df,graph,kind_values,iis_cxn_type,reading_atts,attributes,classify_scheme)) if len(kinds) == 1: kind = kinds[0] if counter is None: pass # Track every matched interaction that is classified as corroborated interaction or contradicted interaction # The tracker part is added after kind value is signed, to avoid corruption of the functionality of original version. else: if kind in [kind_values['strong corroboration'], kind_values['empty attribute'], kind_values['indirect interaction'], kind_values['specification']]: _kind_score_to_model_int_id( x, kind, kinds, model_t_indices, model_s_indices, counter['corroboration'] ) elif int(kind) in [kind_values['dir contradiction'], kind_values['sign contradiction'], kind_values['att contradiction']]: if classify_scheme == '2': # CS2 involves path for the category of contradiction, # For CS2, all the kind scores that are classified in `finding_path` function are converted to strings instead of int. if type(kind) == str: kind = int(kind) else: _kind_score_to_model_int_id( x, kind, kinds, model_t_indices, model_s_indices, counter['contradiction'] ) else: _kind_score_to_model_int_id( x, kind, kinds, model_t_indices, model_s_indices, counter['contradiction'] ) else: pass # Strong Corroboration elif kind_values['strong corroboration'] in kinds: kind = kind_values['strong corroboration'] if counter is None: pass # Track every matched interaction that is classified as corroborated interaction or contradicted interaction else: _kind_score_to_model_int_id( x, kind, kinds, model_t_indices, model_s_indices, counter['corroboration'], ) # Weak Corroboration elif kind_values['empty attribute'] in kinds: kind = kind_values['empty attribute'] if counter is None: pass # Track every matched interaction that is classified as corroborated interaction or contradicted interaction else: _kind_score_to_model_int_id( x, kind, kinds, model_t_indices, model_s_indices, counter['corroboration'] ) elif kind_values['indirect interaction'] in kinds: kind = kind_values['indirect interaction'] if counter is None: pass # Track every matched interaction that is classified as corroborated interaction or contradicted interaction else: _kind_score_to_model_int_id( x, kind, kinds, model_t_indices, model_s_indices, counter['corroboration'] ) elif kind_values['path corroboration'] in kinds: kind = kind_values['path corroboration'] elif kind_values['specification'] in kinds: kind = kind_values['specification'] if counter is None: pass # Track every matched interaction that is classified as corroborated interaction or contradicted interaction else: _kind_score_to_model_int_id( x, kind, kinds, model_t_indices, model_s_indices, counter['corroboration'] ) # Contradiction elif kind_values['dir contradiction'] in kinds or str(kind_values['dir contradiction']) in kinds: kind = kind_values['dir contradiction'] if counter is None: pass # Track every matched interaction that is classified as corroborated interaction or contradicted interaction else: if classify_scheme == '2': kind_value = [x for x in kinds if x in [kind_values['dir contradiction'], str(kind_values['dir contradiction'])]] for _ in kind_value: if type(_) == str: pass else: _kind_score_to_model_int_id( x, _, kinds, model_t_indices, model_s_indices, counter['contradiction'] ) break else: _kind_score_to_model_int_id( x, kind, kinds, model_t_indices, model_s_indices, counter['contradiction'] ) elif kind_values['sign contradiction'] in kinds or str(kind_values['sign contradiction']) in kinds: kind = kind_values['sign contradiction'] if counter is None: pass # Track every matched interaction that is classified as corroborated interaction or contradicted interaction else: if classify_scheme == '2': kind_value = [x for x in kinds if x in [kind_values['sign contradiction'], str(kind_values['sign contradiction'])]] for _ in kind_value: if type(_) == str: pass else: _kind_score_to_model_int_id( x, _, kinds, model_t_indices, model_s_indices, counter['contradiction'] ) break else: _kind_score_to_model_int_id( x, kind, kinds, model_t_indices, model_s_indices, counter['contradiction'] ) elif kind_values['att contradiction'] in kinds or str(kind_values['att contradiction']) in kinds: kind = kind_values['att contradiction'] if counter is None: pass # Track every matched interaction that is classified as corroborated interaction or contradicted interaction else: if classify_scheme == '2': kind_value = [x for x in kinds if x in [kind_values['att contradiction'], str(kind_values['att contradiction'])]] for _ in kind_value: if type(_) == str: pass else: _kind_score_to_model_int_id( x, _, kinds, model_t_indices, model_s_indices, counter['contradiction'] ) break else: _kind_score_to_model_int_id( x, kind, kinds, model_t_indices, model_s_indices, counter['contradiction'] ) # Extensions elif kind_values['hanging extension'] in kinds: kind = kind_values['hanging extension'] elif kind_values['internal extension'] in kinds: kind = kind_values['internal extension'] elif kind_values['full extension'] in kinds: kind = kind_values['full extension'] # Flagged elif kind_values['dir mismatch'] in kinds: kind = kind_values['dir mismatch'] elif kind_values['path mismatch'] in kinds: kind = kind_values['path mismatch'] elif kind_values['self-regulation'] in kinds: kind = kind_values['self-regulation'] # check if the classify scheme is version 3 elif 'flagged4' in kind_values: if kind_values['flagged4'] in kinds: kind = kind_values['flagged4'] elif 'flagged5' in kind_values: if kind_values['flagged5'] in kinds: kind = kind_values['flagged5'] else: pass # Both Extension - Both nodes from reading not in model elif (source_id == -1 and source_name == -1 and source_hgnc == -1) and (target_id == -1 and target_name == -1 and target_hgnc == -1): kind = kind_values['full extension'] # Hanging Extension - One from reading not in model else: kind = kind_values['hanging extension'] return kind
[docs]def epistemic_value(x: int,reading_df: pd.DataFrame) -> float: """ Finds the epistemic value of the interaction in Interaction Set (IS) (when available). Parameters ---------- x : int The row index for an iIS. reading_df : pd.DataFrame An IS dataframe. Returns ------- e_value : float The Epistemic Value; if there is no Epistemic Value available for the reading, default is 1 for all interactions in IS. """ if 'Epistemic Value' in reading_df.columns: e_value = reading_df.loc[x,'Epistemic Value'] else: e_value = 1 return e_value
[docs]def score_reading(reading_df: pd.DataFrame, model_df: pd.DataFrame, graph:nx.DiGraph, counter: dict=None, kind_values: dict=None, match_values: dict=None, attributes: list=atts_list, classify_scheme: str='1', mi_cxn: str='d') -> pd.DataFrame: """ This function creates new columns for the Match Score, Kind Score, Epistemic Value, and Total Score. it calls scoring functions and stores the values in the approriate column. Parameters ---------- reading_df : pd.DataFrame The reading dataframe. model_df : pd.DataFrame The model dataframe. graph : nx.DiGraph directed graph of the model, necessary for calling kind_score module. counter: dict A dictionary for counting the corrobrated and contradicted interaction. defulat value is None and ignore the counting step. kind_values : dict Dictionary assigning Kind Score values. Default values found in KIND_DICT_A and KIND_DICT_B. match_values : dict Dictionary assigning Match Score values. Default values found in MATCH_DICT. attributes : list List of attributes compared between the model and the machine reading output. Default is None. classify_scheme: str The scheme of the classification. Default value is '1'. Returns ------- scored = reading_df : pd.DataFrame reading dataframe with added scores. """ assert (classify_scheme in ['1', '2', '3']) assert (mi_cxn in ['d', 'i']) if kind_values is None: if classify_scheme in ['1', '2']: kind_values = KIND_DICT_A elif classify_scheme == '3': kind_values = KIND_DICT_B else: pass if match_values is None: match_values = MATCH_DICT #Create new DF columns for score calculations scored_reading_df = reading_df.copy() scored_reading_df['Match Score'] = pd.Series() scored_reading_df['Kind Score'] = pd.Series() scored_reading_df['Epistemic Value'] = pd.Series() scored_reading_df['Total Score'] = pd.Series() #Calculate scores for x in range(reading_df.shape[0]): scored_reading_df.at[x,'Match Score'] = match_score(x,reading_df,model_df, match_values) scored_reading_df.at[x,'Kind Score'] = kind_score(x,model_df,reading_df,graph, counter,kind_values,attributes,classify_scheme,mi_cxn) scored_reading_df.at[x,'Epistemic Value'] = epistemic_value(x,reading_df) scored_reading_df.at[x,'Total Score'] = ((scored_reading_df.at[x,'Evidence Score']*scored_reading_df.at[x,'Match Score'])+int(scored_reading_df.at[x,'Kind Score']))*scored_reading_df.at[x,'Epistemic Value'] return scored_reading_df