Source code for tdc.multi_pred.dti

# -*- coding: utf-8 -*-
# Author: TDC Team
# License: MIT

import warnings
import sys

from ..utils import print_sys
from . import bi_pred_dataset, multi_pred_dataset
from ..metadata import dataset_names

[docs]class DTI(bi_pred_dataset.DataLoader): """Data loader class to load datasets in Drug-Target Interaction Prediction task. More info: Regression task. Given the target amino acid sequence/compound SMILES string, predict their binding affinity. Args: name (str): the dataset name. path (str, optional): The path to save the data file, defaults to './data' label_name (str, optional): For multi-label dataset, specify the label name, defaults to None print_stats (bool, optional): Whether to print basic statistics of the dataset, defaults to False """ def __init__(self, name, path='./data', label_name=None, print_stats=False): """Create Drug-Target Interaction Prediction dataloader object """ super().__init__(name, path, label_name, print_stats, dataset_names=dataset_names["DTI"]) self.entity1_name = 'Drug' self.entity2_name = 'Target' self.two_types = True if print_stats: self.print_stats() print('Done!', flush=True, file=sys.stderr)
[docs] def harmonize_affinities(self, mode = None): """Removing duplicated drug-target pairs with different binding affinities. """ if not in ['bindingdb_ki', 'bindingdb_kd', 'bindingdb_ic50']: raise ValueError('This function is not supported for ' + + ' because they are already duplicates removed!') if mode not in ['mean', 'max_affinity']: raise ValueError("Please specify 'mode' of removal, currently supported 'mean'/'max_affinity'!") if mode == 'max_affinity': df_ = self.get_data() return df_.groupby(['Drug_ID', 'Drug', 'Target_ID', 'Target']).Y.agg(min).reset_index() elif mode == 'mean': import numpy as np df_ = self.get_data() return df_.groupby(['Drug_ID', 'Drug', 'Target_ID', 'Target']).Y.agg(np.mean).reset_index()