Source code for tdc.oracles

import pandas as pd
import numpy as np
import os, sys, json 
import warnings
warnings.filterwarnings("ignore")

from .utils import fuzzy_search, oracle_load, receptor_load
from .metadata import download_oracle_names, oracle_names, distribution_oracles, download_receptor_oracle_name, docking_target_info 


def _normalize_docking_score(raw_score):
	return 1/(1+np.exp((raw_score+7.5)))

[docs]class Oracle: """the oracle class to retrieve any oracle given by query name Args: name (str): the name of the oracle target_smiles (None, optional): target smiles for some meta-oracles num_max_call (None, optional): number of maximum calls for oracle, used by docking group **kwargs: additional parameters for some oracles """ def __init__(self, name, target_smiles = None, num_max_call = None, **kwargs): """Summary """ self.target_smiles = target_smiles self.kwargs = kwargs self.normalize = lambda x:x name = fuzzy_search(name, oracle_names) if name == 'drd3_docking': name = '3pbl_docking' if name == 'drd3_docking_normalize': name = '3pbl_docking_normalize' if name in download_oracle_names: ##### e.g., jnk, gsk, drd2, ... self.name = oracle_load(name) elif name in download_receptor_oracle_name: ## '1iep_docking', '2rgp_docking', '7l11_docking', 'drd3_docking', '3pbl_docking', pdbid = name.split('_')[0] self.name = receptor_load(pdbid) self.pdbid = self.name self.name += '_docking' if 'normalize' in name: self.name += '_normalize' self.normalize = _normalize_docking_score else: self.name = name self.evaluator_func = None self.assign_evaluator() self.num_called = 0 if num_max_call is not None: self.num_max_call = num_max_call else: self.num_max_call = None
[docs] def assign_evaluator(self): """assign the specific oracle function given by query oracle name """ self.default_property = 0.0 if self.name == 'logp': from .chem_utils import penalized_logp self.evaluator_func = penalized_logp elif self.name == 'qed': from .chem_utils import qed self.evaluator_func = qed elif self.name == 'drd2': from .chem_utils import drd2 self.evaluator_func = drd2 elif self.name == 'cyp3a4_veith': from .chem_utils import cyp3a4_veith self.evaluator_func = cyp3a4_veith elif self.name == 'sa': from .chem_utils import SA self.evaluator_func = SA elif self.name == 'gsk3b': from .chem_utils import gsk3b oracle_object = gsk3b self.evaluator_func = oracle_object elif self.name == 'jnk3': from .chem_utils import jnk3 oracle_object = jnk3() self.evaluator_func = oracle_object elif self.name == 'similarity_meta': from .chem_utils import similarity_meta self.evaluator_func = similarity_meta(target_smiles = self.target_smiles, **self.kwargs) elif self.name == 'rediscovery_meta': from .chem_utils import rediscovery_meta self.evaluator_func = rediscovery_meta(target_smiles = self.target_smiles, **self.kwargs) elif self.name == 'isomer_meta': from .chem_utils import isomer_meta self.evaluator_func = isomer_meta(target_smiles = self.target_smiles, **self.kwargs) elif self.name == 'median_meta': from .chem_utils import median_meta self.evaluator_func = median_meta(target_smiles_1 = self.target_smiles[0], target_smiles_2 = self.target_smiles[1], **self.kwargs) elif self.name == 'rediscovery': from .chem_utils import celecoxib_rediscovery, troglitazone_rediscovery, thiothixene_rediscovery self.evaluator_func = {"Celecoxib": celecoxib_rediscovery, "Troglitazone": troglitazone_rediscovery, "Thiothixene": thiothixene_rediscovery} elif self.name == 'celecoxib_rediscovery': from .chem_utils import celecoxib_rediscovery self.evaluator_func = celecoxib_rediscovery elif self.name == 'troglitazone_rediscovery': from .chem_utils import troglitazone_rediscovery self.evaluator_func = troglitazone_rediscovery elif self.name == 'thiothixene_rediscovery': from .chem_utils import thiothixene_rediscovery self.evaluator_func = thiothixene_rediscovery elif self.name == 'similarity': from .chem_utils import aripiprazole_similarity, albuterol_similarity, mestranol_similarity self.evaluator_func = {"Aripiprazole": aripiprazole_similarity, "Albuterol": albuterol_similarity, "Mestranol": mestranol_similarity} elif self.name == 'aripiprazole_similarity': from .chem_utils import aripiprazole_similarity self.evaluator_func = aripiprazole_similarity elif self.name == 'albuterol_similarity': from .chem_utils import albuterol_similarity self.evaluator_func = albuterol_similarity elif self.name == 'mestranol_similarity': from .chem_utils import mestranol_similarity self.evaluator_func = mestranol_similarity elif self.name == 'median': from .chem_utils import median1, median2 self.evaluator_func = {'Median 1': median1, 'Median 2': median2} elif self.name == 'median1': from .chem_utils import median1 self.evaluator_func = median1 elif self.name == 'median2': from .chem_utils import median2 self.evaluator_func = median2 elif self.name == 'mpo': from .chem_utils import osimertinib_mpo, fexofenadine_mpo, ranolazine_mpo, perindopril_mpo, amlodipine_mpo, sitagliptin_mpo, zaleplon_mpo self.evaluator_func = {'Osimertinib': osimertinib_mpo, 'Fexofenadine': fexofenadine_mpo, 'Ranolazine': ranolazine_mpo, 'Perindopril': perindopril_mpo, 'Amlodipine': amlodipine_mpo, 'Sitagliptin': sitagliptin_mpo, 'Zaleplon': zaleplon_mpo} elif self.name == 'osimertinib_mpo': from .chem_utils import osimertinib_mpo self.evaluator_func = osimertinib_mpo elif self.name == 'fexofenadine_mpo': from .chem_utils import fexofenadine_mpo self.evaluator_func = fexofenadine_mpo elif self.name == 'ranolazine_mpo': from .chem_utils import ranolazine_mpo self.evaluator_func = ranolazine_mpo elif self.name == 'perindopril_mpo': from .chem_utils import perindopril_mpo self.evaluator_func = perindopril_mpo elif self.name == 'amlodipine_mpo': from .chem_utils import amlodipine_mpo self.evaluator_func = amlodipine_mpo elif self.name == 'sitagliptin_mpo': from .chem_utils import sitagliptin_mpo self.evaluator_func = sitagliptin_mpo elif self.name == 'zaleplon_mpo': from .chem_utils import zaleplon_mpo self.evaluator_func = zaleplon_mpo elif self.name == 'valsartan_smarts': from .chem_utils import valsartan_smarts self.evaluator_func = valsartan_smarts elif self.name == 'hop': from .chem_utils import deco_hop, scaffold_hop self.evaluator_func = {'Deco Hop': deco_hop, 'Scaffold Hop': scaffold_hop} elif self.name == 'deco_hop': from .chem_utils import deco_hop self.evaluator_func = deco_hop elif self.name == 'scaffold_hop': from .chem_utils import scaffold_hop self.evaluator_func = scaffold_hop elif self.name == 'isomers_c7h8n2o2': from .chem_utils import isomers_c7h8n2o2 self.evaluator_func = isomers_c7h8n2o2 elif self.name == 'isomers_c9h10n2o2pf2cl': from .chem_utils import isomers_c9h10n2o2pf2cl self.evaluator_func = isomers_c9h10n2o2pf2cl elif self.name == 'isomers_c11h24': from .chem_utils import isomers_c11h24 self.evaluator_func = isomers_c11h24 elif self.name == 'isomers': from .chem_utils import isomers_c7h8n2o2, isomers_c9h10n2o2pf2cl self.evaluator_func = {'c7h8n2o2': isomers_c7h8n2o2, 'c9h10n2o2pf2cl': isomers_c9h10n2o2pf2cl} elif self.name == 'askcos': #### synthetic analysis from .chem_utils import askcos self.evaluator_func = askcos elif self.name == 'ibm_rxn': from .chem_utils import ibm_rxn self.evaluator_func = ibm_rxn elif self.name == 'molecule_one_synthesis': from .chem_utils import molecule_one_retro self.evaluator_func = molecule_one_retro(**self.kwargs) elif self.name == 'pyscreener': from .chem_utils import PyScreener_meta self.evaluator_func = PyScreener_meta(**self.kwargs) elif self.name == 'docking_score': from .chem_utils import Vina_smiles self.evaluator_func = Vina_smiles(**self.kwargs) elif self.name == 'drd3_docking_vina' or self.name == '3pbl_docking_vina': from .chem_utils import Vina_smiles pdbid = '3pbl' center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = Vina_smiles(receptor_pdbqt_file='./oracle/'+pdbid+'.pdbqt', center = center, box_size = boxsize) elif self.name == 'drd3_docking' or self.name == '3pbl_docking' \ or self.name == 'drd3_docking_normalize' or self.name == '3pbl_docking_normalize': from .chem_utils import PyScreener_meta pdbid = '3pbl' center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = PyScreener_meta(receptor_pdb_file='./oracle/'+pdbid+'.pdb', box_center = center, box_size = boxsize) elif self.name == '1iep_docking_vina': from .chem_utils import Vina_smiles pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = Vina_smiles(receptor_pdbqt_file='./oracle/'+pdbid+'.pdbqt', center = center, box_size = boxsize) elif self.name == '1iep_docking' or self.name == '1iep_docking_normalize': from .chem_utils import PyScreener_meta pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = PyScreener_meta(receptor_pdb_file='./oracle/'+pdbid+'.pdb', box_center = center, box_size = boxsize) elif self.name == '2rgp_docking_vina': from .chem_utils import Vina_smiles pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = Vina_smiles(receptor_pdbqt_file='./oracle/'+pdbid+'.pdbqt', center = center, box_size = boxsize) elif self.name == '2rgp_docking' or self.name == '2rgp_docking_normalize': from .chem_utils import PyScreener_meta pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = PyScreener_meta(receptor_pdb_file='./oracle/'+pdbid+'.pdb', box_center = center, box_size = boxsize) elif self.name == '3eml_docking_vina': from .chem_utils import Vina_smiles pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = Vina_smiles(receptor_pdbqt_file='./oracle/'+pdbid+'.pdbqt', center = center, box_size = boxsize) elif self.name == '3eml_docking' or self.name == '3eml_docking_normalize': from .chem_utils import PyScreener_meta pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = PyScreener_meta(receptor_pdb_file='./oracle/'+pdbid+'.pdb', box_center = center, box_size = boxsize) elif self.name == '3ny8_docking_vina': from .chem_utils import Vina_smiles pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = Vina_smiles(receptor_pdbqt_file='./oracle/'+pdbid+'.pdbqt', center = center, box_size = boxsize) elif self.name == '3ny8_docking' or self.name == '3ny8_docking_normalize': from .chem_utils import PyScreener_meta pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = PyScreener_meta(receptor_pdb_file='./oracle/'+pdbid+'.pdb', box_center = center, box_size = boxsize) elif self.name == '4rlu_docking_vina': from .chem_utils import Vina_smiles pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = Vina_smiles(receptor_pdbqt_file='./oracle/'+pdbid+'.pdbqt', center = center, box_size = boxsize) elif self.name == '4rlu_docking' or self.name == '4rlu_docking_normalize': from .chem_utils import PyScreener_meta pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = PyScreener_meta(receptor_pdb_file='./oracle/'+pdbid+'.pdb', box_center = center, box_size = boxsize) elif self.name == '4unn_docking_vina': from .chem_utils import Vina_smiles pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = Vina_smiles(receptor_pdbqt_file='./oracle/'+pdbid+'.pdbqt', center = center, box_size = boxsize) elif self.name == '4unn_docking' or self.name == '4unn_docking_normalize': from .chem_utils import PyScreener_meta pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = PyScreener_meta(receptor_pdb_file='./oracle/'+pdbid+'.pdb', box_center = center, box_size = boxsize) elif self.name == '5mo4_docking_vina': from .chem_utils import Vina_smiles pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = Vina_smiles(receptor_pdbqt_file='./oracle/'+pdbid+'.pdbqt', center = center, box_size = boxsize) elif self.name == '5mo4_docking' or self.name == '5mo4_docking_normalize': from .chem_utils import PyScreener_meta pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = PyScreener_meta(receptor_pdb_file='./oracle/'+pdbid+'.pdb', box_center = center, box_size = boxsize) elif self.name == '7l11_docking_vina': from .chem_utils import Vina_smiles pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = Vina_smiles(receptor_pdbqt_file='./oracle/'+pdbid+'.pdbqt', center = center, box_size = boxsize) elif self.name == '7l11_docking' or self.name == '7l11_docking_normalize': from .chem_utils import PyScreener_meta pdbid = self.name.split('_')[0] center = docking_target_info[pdbid]['center'] boxsize = docking_target_info[pdbid]['size'] self.evaluator_func = PyScreener_meta(receptor_pdb_file='./oracle/'+pdbid+'.pdb', box_center = center, box_size = boxsize) # elif self.name == '3pbl_docking': # from .chem_utils import Vina_smiles # pdbid = self.name.split('_')[0] # center = docking_target_info[pdbid]['center'] # boxsize = docking_target_info[pdbid]['size'] # self.evaluator_func = Vina_smiles(receptor_pdbqt_file='./oracle/'+pdbid+'.pdbqt', # center = center, # box_size = boxsize) elif self.name == 'uniqueness': from .chem_utils import uniqueness self.evaluator_func = uniqueness elif self.name == 'validity': from .chem_utils import validity self.evaluator_func = validity elif self.name == 'diversity': from .chem_utils import diversity self.evaluator_func = diversity elif self.name == 'novelty': from .chem_utils import novelty self.evaluator_func = novelty elif self.name == 'fcd_distance': from .chem_utils import fcd_distance self.evaluator_func = fcd_distance elif self.name == 'kl_divergence': from .chem_utils import kl_divergence self.evaluator_func = kl_divergence else: return
def __call__(self, *args, **kwargs): """call the oracle function on SMILES to genenerate scores Args: *args: a list of SMILES/a string of SMILES **kwargs: additional parameters for some oracles Returns: float/list: the oracle score(s) for a single/list of SMILES Raises: ValueError: reached number of maximum calls if set and has queries the oracle more than the internal call counters """ if self.name in distribution_oracles: ## 'novelty', 'diversity', 'uniqueness', 'validity', 'fcd_distance', 'kl_divergence' return self.evaluator_func(*args, **kwargs) from rdkit import Chem smiles_lst = args[0] if self.name == 'molecule_one_synthesis': return self.evaluator_func(*args, **kwargs) if type(smiles_lst) == list: nonvalid_smiles_idx_lst, valid_smiles_lst, valid_smiles_idx_lst = [], [], [] NN = len(smiles_lst) for idx, smiles in enumerate(smiles_lst): if Chem.MolFromSmiles(smiles) == None: nonvalid_smiles_idx_lst.append(idx) else: valid_smiles_idx_lst.append(idx) valid_smiles_lst.append(smiles) smiles_lst = valid_smiles_lst self.num_called += len(smiles_lst) if self.num_max_call is not None: if self.num_max_call < self.num_called: self.num_called -= len(smiles_lst) raise ValueError("The maximum number of evaluator call is reached! The maximum is: " + str(self.num_max_call) + '. The current requested call (plus accumulated calls) is: ' + str(self.num_called + len(smiles_lst))) #### evaluator for single molecule, #### the input of __call__ is a single smiles OR list of smiles if isinstance(self.evaluator_func, dict): all_ = {} for i, fct in self.evaluator_func.items(): results_lst = [] for smiles in smiles_lst: results_lst.append(fct(smiles, *(args[1:]), **kwargs)) all_[i] = results_lst return all_ else: results_lst = [] if not self.name == 'docking_score': for smiles in smiles_lst: results_lst.append(self.normalize(self.evaluator_func(smiles, *(args[1:]), **kwargs))) else: results_lst = [] for smiles in smiles_lst: try: results = self.evaluator_func([smiles], *(args[1:]), **kwargs) results = results[0] except: results = self.default_property results_lst.append(results) # results_lst = self.evaluator_func(smiles_lst, *(args[1:]), **kwargs) results_lst = [self.normalize(i) for i in results_lst] all_results_lst = [self.default_property for i in range(NN)] for idx,result in zip(valid_smiles_idx_lst, results_lst): all_results_lst[idx] = result return all_results_lst else: ### a string of SMILES if Chem.MolFromSmiles(smiles_lst) == None: return self.default_property self.num_called += 1 if self.num_max_call is not None: if self.num_max_call < self.num_called: self.num_called -= 1 raise ValueError("The maximum number of evaluator call is reached! The maximum is: " + str(self.num_max_call) + '. The current requested call (plus accumulated calls) is: ' + str(self.num_called + 1)) ## a single smiles if type(self.evaluator_func) == dict: all_ = {} for i, fct in self.evaluator_func.items(): all_[i] = fct(*args, **kwargs) return all_ else: try: score = self.evaluator_func(*args, **kwargs) except: score = self.default_property return self.normalize(score)
# return self.normalize(self.evaluator_func(*args, **kwargs))