Source code for violin.scoring

"""
scoring.py

Handles the Match Score, Kind Score, and Epistemic Value functions for VIOLIN
Created November 2019 - Casey Hansen MeLoDy Lab
Updated June 2025 - Haomiao Luo
"""

import pandas as pd
import networkx as nx
from violin.numeric import get_attributes, find_element, compare
from violin.network import path_finding
from typing import List, Union
import logging 

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


# Default kind score dict - categories
# KIND_DICT = {"strong corroboration" : 2,
#                 "empty attribute" : 1,
#                 "indirect interaction" : 1,
#                 "path corroboration" : 1,
#                 "hanging extension" : 40,
#                 "full extension" : 40,
#                 "internal extension" : 40,
#                 "specification" : 30,
#                 "dir contradiction" : 10,
#                 "sign contradiction" : 10,
#                 "att contradiction" : 10,
#                 "dir mismatch" : 20,
#                 "path mismatch" : 20,
#                 "self-regulation" : 20}

KIND_DICT_A = {"strong corroboration" : 2,
                "empty attribute" : 1,
                "indirect interaction" : 3,
                "path corroboration" : 5,
                "specification" : 7,
                "hanging extension" : 40,
                "full extension" : 39,
                "internal extension" : 38,
                "dir contradiction" : 11,
                "sign contradiction" : 10,
                "att contradiction" : 9,
                "dir mismatch" : 20,
                "path mismatch" : 19,
                "self-regulation" : 18}

KIND_DICT_B = {"strong corroboration" : 2,
                "empty attribute" : 1,
                "indirect interaction" : 3,
                "path corroboration" : 5,
                "specification" : 7,
                "hanging extension" : 40,
                "full extension" : 39,
                "internal extension" : 38,
                "dir contradiction" : 11,
                "sign contradiction" : 10,
                "att contradiction" : 9,
                "dir mismatch" : 20,
                "path mismatch" : 19,
                "self-regulation" : 18,
                "flagged4" : 17,
                "flagged5" : 16}

MATCH_DICT = {"source present" : 1,
                "target present" : 100,
                "both present" : 10,
                "neither present" : 0.1}

# Default attributes list is empty
atts_list = []


[docs]def match_score(x:int, reading_df: pd.DataFrame, model_df: pd.DataFrame, match_values: dict=None) -> int:
    """
    This function calculates the Match Score for an interaction from the reading.

    Parameters
    ----------
    x : int
        A row index of the dataframe of Interaction set (IS) to be scored
    reading_df : pd.DataFrame
        The reading dataframe
    model_df : pd.DataFrame
        The model dataframe
    match_values : dict
        Dictionary assigning Match Score values
        Default values found in MATCH_DICT

    Returns
    -------
    match : int
        Match Score value
    """
    global MATCH_DICT
    regulated = False
    regulator = False

    reg_sign = reading_df.loc[x, 'Sign']

    if match_values is None:
        match_values = MATCH_DICT

    # Search for regulated from reading in model
    if (find_element("name",
                     reading_df.loc[x, 'Regulated Name'],
                     reading_df.loc[x, 'Regulated Type'],
                     model_df) != -1 or
        find_element("hgnc",
                     reading_df.loc[x, 'Regulated HGNC Symbol'],
                     reading_df.loc[x, 'Regulated Type'],
                     model_df) != -1 or
        find_element("id",
                     reading_df.loc[x, 'Regulated ID'],
                     reading_df.loc[x, 'Regulated Type'],
                     model_df,
                     reading_df.loc[x, 'Regulated Database']) != -1 ):
        regulated = True

    # Search for regulator from reading in model
    if (find_element("name",
                     reading_df.loc[x, 'Regulator Name'],
                     reading_df.loc[x, 'Regulator Type'],
                     model_df) != -1 or
        find_element("hgnc",
                     reading_df.loc[x, 'Regulator HGNC Symbol'],
                     reading_df.loc[x, 'Regulator Type'],
                     model_df) != -1 or
        find_element("id",
                     reading_df.loc[x, 'Regulator ID'],
                     reading_df.loc[x, 'Regulator Type'],
                     model_df,
                     reading_df.loc[x, 'Regulator Database']) != -1 ):
        regulator = True

    # Scoring definition
    # Only the regulator in the model
    if not regulated and regulator: match = match_values['source present']
    # Only the regulated in the model
    elif regulated and not regulator: match = match_values['target present']
    # Both regulator and regulated in the model
    elif regulated and regulator: match = match_values['both present']
    # Neither present in the model
    else: match = match_values['neither present']

    return match

def _kind_score_to_model_int_id(row_idx: int,
                           query_kind: int, 
                           kinds_list: List[Union[str, int]], 
                           model_t_indices: List[int], 
                           model_s_indices: List[int],
                           counter_list: List[str],
                           search_point: int=0,
                           count_path:bool=False) -> str:
    """Function to record the interaction ID of the model that is classified. The function recursively records the unique IDs
    Parameters
    ----------
    row_idx: int
        The row index of the interaction to be classified.
    query_kind : int
        The kind score to be classified.
    kinds_list : list
        The list of kind scores.
    model_t_indices : list
        The model target indices.   
    model_s_indices : list
        The model source indices.
    counter_list: list
        A list to record the interactions that are identified as corroborated or contradicted interaction in model.
    search_point : int
        The point to start searching in the list of kind scores.
    count_path: bool
        An indicator whether to count the path or not. Default is False, because the path should not be an interaction in model.
    
    Returns
    -------
    str
        The interaction ID of the model that is classified.
    """
    # TODO: Implement path index finding for UI
    if count_path:
        raise NotImplementedError("Path finding is not implemented yet.")
    
    # Confirm that queried kind is in the list of kinds, starting from the srearch point
    if query_kind not in kinds_list[search_point:]:
        logger.info("%s: The queried kind %s is either not in the list of kinds or counted already." % (row_idx, query_kind))
        # Exit function
        return
    
    # Find the index of the queried kind in the list of kinds
    current_kind_idx = kinds_list.index(query_kind, search_point)

    # Find the corresponding source and target indices in the model
    source_found = model_s_indices[current_kind_idx % len(model_s_indices)]
    target_found = model_t_indices[current_kind_idx // len(model_s_indices)]
    
    # Check if the interaction ID is already in the counter
    if '%s+%s' % (source_found, target_found) not in counter_list:
        counter_list.append('%s+%s' % (source_found, target_found))
        return 
    
    # If the interaction ID is already in the counter, check next kind is same as queried kind
    else:
        _kind_score_to_model_int_id(row_idx,
                               query_kind, 
                               kinds_list, 
                               model_t_indices, 
                               model_s_indices, 
                               counter_list, 
                               search_point=current_kind_idx + 1)
        
        
    
[docs]def kind_score(x: int,
               model_df: pd.DataFrame,
               reading_df: pd.DataFrame,
               graph: nx.DiGraph,
               counter: dict,
               kind_values: dict=None,
               attributes: list=None,
               classify_scheme: str='1',
               mi_cxn: str='d') -> int:
    """
    This function calculates the Kind Score for an interaction in the Interactions Set (iIS).
    The kind score will be used to represent the subcategories.
    For further details, please find out in: https://www.biorxiv.org/content/10.1101/2024.07.21.604448v1.

    Parameters
    ----------
    x : int
        The row index for an iIS.
    model_df: pd.DataFrame
        The model dataframe
    reading_df : pd.DataFrame
        The reading dataframe.
    graph : nx.DiGraph
        A directed graph of the model,used when function calls path_finding module.
    counter: dict
        A dictionary to record the interactions that are identified as corroborated or contradicted interaction in model.
        Default value is None.
    kind_values : dict
        Dictionary assigning Kind Score values.
        Default values found in KIND_DICT_A and KIND_DICT_B.
    attributes : list
        A list of attributes compared between the model and the machine reading output.
        Default is None.
    classify_scheme: str
        The scheme of the classification ('1', '2', and '3').
        Default is '1'.
    mi_cxn : str
        What connection type should be assigned to model interactions if not available.
        Accepted values are "d" (direct) or "i" (indirect).
        Deafult is "d".

    Returns
    -------
    kind : int
        Kind Score, score value.
    """
    # Initialize the parameters
    global MATCH_DICT

    assert (classify_scheme in ['1', '2', '3'])

    if kind_values is None:
        if classify_scheme in ['1', '2']:
            kind_values = KIND_DICT_A

        elif classify_scheme == '3':
            kind_values = KIND_DICT_B
    else:
        pass

    if attributes is None:
        match_score = []

    assert (mi_cxn in ['d', 'i'])

    ### Finding interaction in Interactions Set (iIS) attributes ###
    # Finding iIS regulator sign
    signs = ['Negative', 'Positive']
    if reading_df.loc[x, 'Sign'].lower() in ['activate', 'positive', 'increase']: reg_sign = 'Positive'
    else: reg_sign = 'Negative'
    signs.remove(reg_sign)
    opp_sign = signs[0]

    # Finding iIS Connection Type (if not in iIS input, default to indirect, 'i')
    if 'Connection Type' in reading_df.columns: iis_cxn_type = reading_df.loc[x, 'Connection Type']
    else: iis_cxn_type = 'i'

    # Add full location information, if user want to compare location of the element
    if 'Regulated Compartment' in attributes and 'Regulated Compartment ID' not in attributes:
        attributes.insert(attributes.index('Regulated Compartment') + 1, 'Regulated Compartment ID')
    elif 'Regulated Compartment ID' in attributes and 'Regulated Compartment' not in attributes:
        attributes.insert(attributes.index('Regulated Compartment ID'), 'Regulated Compartment')
    else:
        pass

    if 'Regulator Compartment' in attributes and 'Regulator Compartment ID' not in attributes:
        attributes.insert(attributes.index('Regulator Compartment') + 1, 'Regulator Compartment ID')
    elif 'Regulator Compartment ID' in attributes and 'Regulator Compartment' not in attributes:
        attributes.insert(attributes.index('Regulator Compartment ID'), 'Regulator Compartment')
    else:
        pass

    # Create list for attributes (i.e., location attributes, context attributes, influence attributes)
    reading_atts = attributes

    # Finding iIS other attributes
    if len(attributes) > 0:
        # Attributes for iIS index 'x'
        reading_atts = {att: reading_df.loc[x, att] for att in reading_atts}
    else:
        reading_atts = {}

    # Comparing to model
    source_hgnc = find_element("hgnc",
                               reading_df.loc[x, 'Regulator HGNC Symbol'],
                               reading_df.loc[x, 'Regulator Type'],
                               model_df)
    source_name = find_element("name",
                               reading_df.loc[x, 'Regulator Name'],
                               reading_df.loc[x, 'Regulator Type'],
                               model_df)
    source_id = find_element("id",
                             reading_df.loc[x, 'Regulator ID'],
                             reading_df.loc[x, 'Regulator Type'],
                             model_df,
                             reading_df.loc[x, 'Regulator Database'])

    target_hgnc = find_element("hgnc",
                               reading_df.loc[x, 'Regulated HGNC Symbol'],
                               reading_df.loc[x, 'Regulated Type'],
                               model_df)
    target_name = find_element("name",
                               reading_df.loc[x, 'Regulated Name'],
                               reading_df.loc[x, 'Regulated Type'],
                               model_df)
    target_id = find_element("id",
                             reading_df.loc[x, 'Regulated ID'],
                             reading_df.loc[x, 'Regulated Type'],
                             model_df,
                             reading_df.loc[x, 'Regulated Database'])

    # Both regulator (source) and regulated (target) node found in the model
    if (source_name != -1 or source_hgnc != -1 or source_id != -1) and \
            (target_name != -1 or target_hgnc != -1 or target_id != -1):
        # Find indices of regulator element (target) in model
        #FIXME: TBD for order
        # Privilege: HGNC > Name > ID
        if source_hgnc != -1: model_s_indices = source_hgnc
        elif source_name != -1: model_s_indices = source_name
        else: model_s_indices = source_id

        if target_hgnc != -1: model_t_indices = target_hgnc
        elif target_name != -1: model_t_indices = target_name
        else: model_t_indices = target_id

        kinds = []
        # print(f"s:{model_s_indices}, t:{model_t_indices}")
        # Loop over each instance of the target and source in the model (since the same element may exist multiple status
        for t_idx in model_t_indices:
            # Regulator list in model
            model_s_list = model_df.loc[t_idx, reg_sign+' Regulator List']  \
                if model_df.loc[t_idx, reg_sign+' Regulator List'] != 'nan' else 'nan'
            # Regulator list of opposite sign
            model_s_opp = model_df.loc[t_idx, opp_sign+' Regulator List'] \
                if model_df.loc[t_idx, opp_sign+' Regulator List'] != 'nan' else 'nan'

            for s_idx in model_s_indices:

                source_listname = model_df.loc[s_idx, 'Listname']

                target_listname = model_df.loc[t_idx, 'Listname']

                # MI with match direction, match sign
                if str(model_s_list) != "nan" and source_listname in model_s_list.split(','):
                    # Index of regulator name within regulator list
                    s_index = model_s_list.split(',').index(source_listname)
                    # Finding index MI regulator variable
                    model_s_variable = model_df.loc[t_idx,reg_sign+' Regulator List'].split(',')[s_index]
                    #model_s_element = list(model_df['Variable']).index(model_s_variable)

                    # Find MI connection type
                    if (reg_sign+' Connection Type List') in model_df.columns.values.tolist() and \
                            all(cxn_type.lower().strip() in ['i', 'd'] for cxn_type in
                                                        model_df.loc[t_idx, reg_sign+' Connection Type List'].split(',')):
                        # Connection type
                        mi_cxn_type = model_df.loc[t_idx,reg_sign+' Connection Type List'].split(",")[s_index]
                    else: mi_cxn_type = mi_cxn

                    # List of model attributes to compare to reading attributes
                    model_atts = get_attributes(t_idx, s_idx, reg_sign, model_df, attributes)


                    # If iIS ="I" and MI = "I" or iIS = "D" and MI = "D": check attributes
                    if (iis_cxn_type == "i" and mi_cxn_type == "i") or (iis_cxn_type == "d" and mi_cxn_type != "i"):

                        compare_atts = compare(model_atts, reading_atts)
                        # Strong Corroboration - perfect match
                        if compare_atts == 0:
                            kinds.append(kind_values['strong corroboration'])
                        # Weak corroboration - the iIS presents less information than the model interaction
                        elif compare_atts == 1:
                            kinds.append(kind_values['empty attribute'])
                        # Specification - the iIS presents new information
                        elif compare_atts == 2:
                            kinds.append(kind_values['specification'])
                        # Contradiction - the iIS presents information that disputes the model interaction
                        elif compare_atts == 3:
                            kinds.append(kind_values['att contradiction'])

                    # If iIS = "D" and MI = "I"
                    elif iis_cxn_type == "d" and mi_cxn_type == "i":
                        compare_atts = compare(model_atts, reading_atts)
                        # If attributes are non-contradictory: iIS is a specification
                        if compare_atts in [0,1,2]: kinds.append(kind_values['specification'])
                        # Else: iIS is a contradiction
                        elif compare_atts == 3: kinds.append(kind_values['att contradiction'])

                    # If iIS ="I" and MI = "D":
                    elif iis_cxn_type == "i" and mi_cxn_type == "d":
                        compare_atts = compare(model_atts, reading_atts)
                        #If attributes are non-contradictory: iIS is a weak corroboration
                        if compare_atts in [0,1,2]: kinds.append(kind_values['indirect interaction'])
                        #Else: iIS is a contradiction
                        elif compare_atts == 3: kinds.append(kind_values['att contradiction'])

                # MI with Matched direction, Mismatched sign
                elif str(model_s_opp) != "nan" and source_listname in model_s_opp.split(','):
                    reg_index = model_df.loc[t_idx, opp_sign + " Regulator List"].split(',').index(
                        source_listname)
                    # Finding connection type
                    if (reg_sign+' Connection Type List') in model_df.columns.values.tolist() and \
                            all(cxn_type.lower().strip() in ['i', 'd'] for cxn_type in
                                                        model_df.loc[t_idx, opp_sign+' Connection Type List'].split(',')):
                        #Connection type
                        mi_cxn_type = model_df.loc[t_idx, opp_sign + ' Connection Type List'].split(",")[reg_index]
                    else: mi_cxn_type = mi_cxn
                    # If iIS = "I" and MI = "D"
                    if iis_cxn_type == "i" and mi_cxn_type != "i":
                        if classify_scheme in ['1', '2']:
                            kinds.append(kind_values['sign contradiction'])

                        elif classify_scheme == '3':
                            kinds.append(kind_values['flagged5'])

                    else:
                        #iIS is a Sign Contradiction, regardless of connection type
                        kinds.append(kind_values['sign contradiction'])

                # MI with Mismatched direction, Matched sign
                elif (model_df.loc[s_idx, reg_sign + " Regulator List"] != "nan" and target_listname
                      in model_df.loc[s_idx, reg_sign + " Regulator List"].split(',')):
                    reg_index = model_df.loc[s_idx, reg_sign + " Regulator List"].split(',').index(
                        target_listname)
                    # Finding connection type
                    if (reg_sign + ' Connection Type List') in model_df.columns.values.tolist() and \
                            all(cxn_type.lower().strip() in ['i', 'd'] for cxn_type in
                                                        model_df.loc[s_idx, reg_sign+' Connection Type List'].split(',')):
                        # Connection type
                        mi_cxn_type = model_df.loc[s_idx, reg_sign + ' Connection Type List'].split(",")[reg_index]
                    else:
                        mi_cxn_type = mi_cxn

                    # Finding index MI regulator variable
                    model_reg_variable = model_df.loc[s_idx, reg_sign + ' Regulator List'].split(',')[reg_index]
                    #model_reg_element = list(model_df['Variable']).index(model_reg_variable)
                    # List of model attributes to compare to reading attributes

                    model_atts = get_attributes(s_idx, t_idx, reg_sign, model_df, attributes)

                    # iIS = "I" and MI = "I"
                    if iis_cxn_type == "i" and mi_cxn_type == "i":
                        kinds.append(kind_values['dir contradiction'])

                    # iIS = "D" and MI = "D"
                    elif iis_cxn_type == "d" and mi_cxn_type != "i":
                        compare_atts = compare(model_atts, reading_atts)
                        if classify_scheme in ['1', '2']:
                            # If the attributes are not contradictory - Flagged for manual review
                            if compare_atts in [0, 1, 2]:
                                kinds.append(kind_values['dir mismatch'])

                            # Else - Contradiction
                            else:
                                kinds.append(kind_values['dir contradiction'])
                        elif classify_scheme == '3':
                            kinds.append(kind_values['dir contradiction'])

                        else:
                            raise ValueError('Enter a right scheme number (1, 2, or 3).')

                    # iIS = "I" and MI = "D"
                    elif iis_cxn_type == "i" and mi_cxn_type != "i":
                        compare_atts = compare(model_atts, reading_atts)
                        if classify_scheme in ['1', '2']:
                            # If the attributes are not contradictory - Flagged for manual review
                            if compare_atts in [0, 1, 2]:
                                kinds.append(kind_values['dir mismatch'])

                            # Else - Contradiction
                            else:
                                kinds.append(kind_values['dir contradiction'])
                        elif classify_scheme == '3':
                            if compare_atts in [0, 1, 2]:
                                kinds.append(kind_values['dir contradiction'])

                            else:
                                kinds.append(kind_values['flagged4'])

                        else:
                            raise ValueError('Enter a right scheme number (1, 2, or 3).')

                    # iIS = "D" and MI = "I"
                    elif iis_cxn_type == "d" and mi_cxn_type == "i":
                        kinds.append(kind_values['dir contradiction'])

                #MI with Mismatched direction, Mismatched sign
                elif (model_df.loc[s_idx,opp_sign+" Regulator List"] != "nan" and target_listname in
                      model_df.loc[s_idx,opp_sign+" Regulator List"].split(',')):
                    reg_index = model_df.loc[s_idx,opp_sign+" Regulator List"].split(',').index(target_listname)
                    #Finding connection type
                    if (opp_sign+' Connection Type List') in model_df.columns.values.tolist()and \
                            all(cxn_type.lower().strip() in ['i', 'd'] for cxn_type in
                                                        model_df.loc[s_idx, opp_sign+' Connection Type List'].split(',')):
                        mi_cxn_type = model_df.loc[s_idx,opp_sign+' Connection Type List'].split(",")[reg_index]
                    else: mi_cxn_type = mi_cxn

                    #Finding index MI regulator variable
                    model_reg_variable = model_df.loc[s_idx,opp_sign+' Regulator List'].split(',')[reg_index]
                    #model_reg_element = list(model_df['Variable']).index(model_reg_variable)

                    #List of model attributes to compare to reading attributes
                    model_atts = get_attributes(s_idx, t_idx, opp_sign, model_df, attributes)

                    # iIS = "D" and MI = "D"
                    if iis_cxn_type == "d" and mi_cxn_type != "i":
                        compare_atts = compare(model_atts, reading_atts)
                        if classify_scheme in ['1', '2']:
                            #If the attributes are not contradictory - Flagged for manual review
                            if compare_atts in [0,1,2]:
                                kinds.append(kind_values['dir mismatch'])

                            #Else - Contradiction
                            else: kinds.append(kind_values['dir contradiction'])

                        elif classify_scheme == '3':
                            if compare_atts in [0,1,2]:
                                kinds.append(kind_values['dir contradiction'])

                            else:
                                kinds.append(kind_values['dir mismatch'])

                        else:
                            raise ValueError('Enter a right scheme (1, 2, or 3).')
                    # iIS = "D" and MI = "i"
                    elif iis_cxn_type == "d" and mi_cxn_type == "i": kinds.append(kind_values['dir contradiction'])
                    # iIS = "i" and MI = "D"
                    elif iis_cxn_type == "i" and mi_cxn_type != "i":
                        compare_atts = compare(model_atts, reading_atts)
                        #If the attributes are not contradictory - Flagged for manual review
                        if compare_atts in [0, 1, 2]: kinds.append(kind_values['dir mismatch'])
                        #Else - Contradiction
                        else:
                            if classify_scheme in ['1', '2']:
                                kinds.append(kind_values['dir contradiction'])


                            elif classify_scheme == '3':
                                kinds.append(kind_values['flagged5'])

                    # iIS = "i" and MI = "i"
                    elif iis_cxn_type == "i" and mi_cxn_type == "i": kinds.append(kind_values['dir contradiction'])

                else:
                    # If there is a self-regulation (regulator is both target and source)
                    if t_idx == s_idx:
                        kind = kind_values['self-regulation']
                    # If model does not contain interaction - check for path
                    else:
                        kinds.append(path_finding(source_listname,target_listname,reg_sign,model_df,graph,kind_values,iis_cxn_type,reading_atts,attributes,classify_scheme))


        if len(kinds) == 1:
            kind = kinds[0]
            if counter is None:
                pass
            # Track every matched interaction that is classified as corroborated interaction or contradicted interaction
            # The tracker part is added after kind value is signed, to avoid corruption of the functionality of original version.
            else:
                if kind in [kind_values['strong corroboration'],
                            kind_values['empty attribute'],
                            kind_values['indirect interaction'],
                            kind_values['specification']]:
                    
                    _kind_score_to_model_int_id(
                        x, kind, kinds, model_t_indices, model_s_indices, counter['corroboration']
                    )

                elif int(kind) in [kind_values['dir contradiction'],
                            kind_values['sign contradiction'],
                            kind_values['att contradiction']]:
                    if classify_scheme == '2':
                        # CS2 involves path for the category of contradiction, 
                        # For CS2, all the kind scores that are classified in `finding_path` function are converted to strings instead of int.
                        if type(kind) == str:
                            kind = int(kind)
                        else:
                            _kind_score_to_model_int_id(
                                x, kind, kinds, model_t_indices, model_s_indices, counter['contradiction']
                            )

                    else:
                        _kind_score_to_model_int_id(
                            x, kind, kinds, model_t_indices, model_s_indices, counter['contradiction']
                        )

                else:
                    pass

        # Strong Corroboration
        elif kind_values['strong corroboration'] in kinds:
            kind = kind_values['strong corroboration']
            if counter is None:
                pass
            # Track every matched interaction that is classified as corroborated interaction or contradicted interaction
            else:
                _kind_score_to_model_int_id(
                    x, kind, kinds, model_t_indices, model_s_indices, counter['corroboration'],
                )

        # Weak Corroboration
        elif kind_values['empty attribute'] in kinds:
            kind = kind_values['empty attribute']
            if counter is None:
                pass
            # Track every matched interaction that is classified as corroborated interaction or contradicted interaction
            else:

                _kind_score_to_model_int_id(
                    x, kind, kinds, model_t_indices, model_s_indices, counter['corroboration']
                )

        elif kind_values['indirect interaction'] in kinds:
            kind = kind_values['indirect interaction']
            if counter is None:
                pass
            # Track every matched interaction that is classified as corroborated interaction or contradicted interaction
            else:
                _kind_score_to_model_int_id(
                    x, kind, kinds, model_t_indices, model_s_indices, counter['corroboration']
                )

        elif kind_values['path corroboration'] in kinds:
            kind = kind_values['path corroboration']
        elif kind_values['specification'] in kinds:
            kind = kind_values['specification']
            if counter is None:
                pass
            # Track every matched interaction that is classified as corroborated interaction or contradicted interaction
            else:
                _kind_score_to_model_int_id(
                    x, kind, kinds, model_t_indices, model_s_indices, counter['corroboration']
                )

        # Contradiction
        elif kind_values['dir contradiction'] in kinds or str(kind_values['dir contradiction']) in kinds:
            kind = kind_values['dir contradiction']
            if counter is None:
                pass
            # Track every matched interaction that is classified as corroborated interaction or contradicted interaction
            else:
                if classify_scheme == '2':
                    kind_value = [x for x in kinds if
                                   x in [kind_values['dir contradiction'], str(kind_values['dir contradiction'])]]
                    for _ in kind_value:
                        if type(_) == str:
                            pass
                        else:
                            _kind_score_to_model_int_id(
                                x, _, kinds, model_t_indices, model_s_indices, counter['contradiction']
                            )

                            break
                else:
                    _kind_score_to_model_int_id(
                        x, kind, kinds, model_t_indices, model_s_indices, counter['contradiction']
                    )

        elif kind_values['sign contradiction'] in kinds or str(kind_values['sign contradiction']) in kinds:
            kind = kind_values['sign contradiction']
            if counter is None:
                pass
            # Track every matched interaction that is classified as corroborated interaction or contradicted interaction
            else:
                if classify_scheme == '2':
                    kind_value = [x for x in kinds if
                                   x in [kind_values['sign contradiction'], str(kind_values['sign contradiction'])]]
                    for _ in kind_value:
                        if type(_) == str:
                            pass
                        else:
                            _kind_score_to_model_int_id(
                                x, _, kinds, model_t_indices, model_s_indices, counter['contradiction']
                            )
                            break
                else:
                    _kind_score_to_model_int_id(
                        x, kind, kinds, model_t_indices, model_s_indices, counter['contradiction']
                    )

        elif kind_values['att contradiction'] in kinds or str(kind_values['att contradiction']) in kinds:
            kind = kind_values['att contradiction']
            if counter is None:
                pass
            # Track every matched interaction that is classified as corroborated interaction or contradicted interaction
            else:
                if classify_scheme == '2':
                    kind_value = [x for x in kinds if x in [kind_values['att contradiction'], str(kind_values['att contradiction'])]]
                    for _ in kind_value:
                        if type(_) == str:
                            pass
                        else:
                            _kind_score_to_model_int_id(
                                x, _, kinds, model_t_indices, model_s_indices, counter['contradiction']
                            )
                            break
                else:
                    _kind_score_to_model_int_id(
                        x, kind, kinds, model_t_indices, model_s_indices, counter['contradiction']
                    )

        # Extensions
        elif kind_values['hanging extension'] in kinds:
            kind = kind_values['hanging extension']
        elif kind_values['internal extension'] in kinds:
            kind = kind_values['internal extension']
        elif kind_values['full extension'] in kinds:
            kind = kind_values['full extension']

        # Flagged
        elif kind_values['dir mismatch'] in kinds:
            kind = kind_values['dir mismatch']
        elif kind_values['path mismatch'] in kinds:
            kind = kind_values['path mismatch']
        elif kind_values['self-regulation'] in kinds:
            kind = kind_values['self-regulation']
        # check if the classify scheme is version 3
        elif 'flagged4' in kind_values:
            if kind_values['flagged4'] in kinds:
                kind = kind_values['flagged4']
        elif 'flagged5' in kind_values:
            if kind_values['flagged5'] in kinds:
                kind = kind_values['flagged5']
        else: pass

    # Both Extension - Both nodes from reading not in model
    elif (source_id == -1 and source_name == -1 and source_hgnc == -1) and (target_id == -1 and target_name == -1 and target_hgnc == -1):
        kind = kind_values['full extension']
    # Hanging Extension - One from reading not in model
    else: kind = kind_values['hanging extension']

    return kind


[docs]def epistemic_value(x: int,reading_df: pd.DataFrame) -> float:
    """
    Finds the epistemic value of the interaction in Interaction Set (IS) (when available).

    Parameters
    ----------
    x : int
        The row index for an iIS.
    reading_df : pd.DataFrame
        An IS dataframe.

    Returns
    -------
    e_value : float
        The Epistemic Value; if there is no Epistemic Value available for the reading, default is 1 for all interactions in IS.
    """

    if 'Epistemic Value' in reading_df.columns:
        e_value =  reading_df.loc[x,'Epistemic Value']
    else: e_value = 1

    return e_value


[docs]def score_reading(reading_df: pd.DataFrame,
                model_df: pd.DataFrame,
                graph:nx.DiGraph,
                counter: dict=None,
                kind_values: dict=None,
                match_values: dict=None,
                attributes: list=atts_list,
                classify_scheme: str='1',
                mi_cxn: str='d') -> pd.DataFrame:
    """
    This function creates new columns for the Match Score, Kind Score, Epistemic Value, and Total Score.
    it calls scoring functions and stores the values in the approriate column.

    Parameters
    ----------
    reading_df : pd.DataFrame
        The reading dataframe.
    model_df : pd.DataFrame
        The model dataframe.
    graph : nx.DiGraph
        directed graph of the model, necessary for calling kind_score module.
    counter: dict
        A dictionary for counting the corrobrated and contradicted interaction.
        defulat value is None and ignore the counting step.
    kind_values : dict
        Dictionary assigning Kind Score values.
        Default values found in KIND_DICT_A and KIND_DICT_B.
    match_values : dict
        Dictionary assigning Match Score values.
        Default values found in MATCH_DICT.
    attributes : list
        List of attributes compared between the model and the machine reading output.
        Default is None.
    classify_scheme: str
        The scheme of the classification.
        Default value is '1'.

    Returns
    -------
    scored = reading_df : pd.DataFrame
        reading dataframe with added scores.
    """
    assert (classify_scheme in ['1', '2', '3'])
    assert (mi_cxn in ['d', 'i'])

    if kind_values is None:
        if classify_scheme in ['1', '2']:
            kind_values = KIND_DICT_A

        elif classify_scheme == '3':
            kind_values = KIND_DICT_B
    else:
        pass

    if match_values is None:
        match_values = MATCH_DICT


    #Create new DF columns for score calculations
    scored_reading_df = reading_df.copy()
    scored_reading_df['Match Score'] = pd.Series()
    scored_reading_df['Kind Score'] = pd.Series()
    scored_reading_df['Epistemic Value'] = pd.Series()
    scored_reading_df['Total Score'] = pd.Series()

    #Calculate scores
    for x in range(reading_df.shape[0]):
        scored_reading_df.at[x,'Match Score'] = match_score(x,reading_df,model_df, match_values)
        scored_reading_df.at[x,'Kind Score'] = kind_score(x,model_df,reading_df,graph, counter,kind_values,attributes,classify_scheme,mi_cxn)
        scored_reading_df.at[x,'Epistemic Value'] = epistemic_value(x,reading_df)
        scored_reading_df.at[x,'Total Score'] =  ((scored_reading_df.at[x,'Evidence Score']*scored_reading_df.at[x,'Match Score'])+int(scored_reading_df.at[x,'Kind Score']))*scored_reading_df.at[x,'Epistemic Value']

    return scored_reading_df