# -*- coding: utf-8 -*-
# Author: TDC Team
# License: MIT
import numpy as np
try:
from rdkit import rdBase
rdBase.DisableLog("rdApp.error")
except:
raise ImportError("Please install rdkit by 'conda install -c conda-forge rdkit'! ")
from ...utils import print_sys, install
[docs]class MolFilter:
"""Molecule Filter: filter Molecule based on user-specified condition
Args:
filters:
property_filters_flag: bool,
HBA: [lower_bound, upper_bound]
HBD: [lower_bound, upper_bound]
LogP: [lower_bound, upper_bound]
MW: [lower_bound, upper_bound], Molecule weight
Rot: [lower_bound, upper_bound]
TPSA: [lower_bound, upper_bound]
Returns:
list of SMILES strings that pass the filter.
"""
# MIT License: Checkout https://github.com/PatWalters/rd_filters
def __init__(
self,
filters="all",
property_filters_flag=True,
HBA=[0, 10],
HBD=[0, 5],
LogP=[-5, 5],
MW=[0, 500],
Rot=[0, 10],
TPSA=[0, 200],
):
try:
from rd_filters.rd_filters import RDFilters, read_rules
except:
install("git+https://github.com/PatWalters/rd_filters.git")
from rd_filters.rd_filters import RDFilters, read_rules
import pkg_resources
self.property_filters_flag = property_filters_flag
all_filters = [
"BMS",
"Dundee",
"Glaxo",
"Inpharmatica",
"LINT",
"MLSMR",
"PAINS",
"SureChEMBL",
]
if filters == "all":
filters = all_filters
else:
if isinstance(filters, str):
filters = [filters]
if isinstance(filters, list):
## a set of filters
for i in filters:
if i not in all_filters:
raise ValueError(
i
+ " not found; Please choose from a list of available filters from 'BMS', 'Dundee', 'Glaxo', 'Inpharmatica', 'LINT', 'MLSMR', 'PAINS', 'SureChEMBL'"
)
alert_file_name = pkg_resources.resource_filename(
"rd_filters", "data/alert_collection.csv"
)
rules_file_path = pkg_resources.resource_filename(
"rd_filters", "data/rules.json"
)
self.rf = RDFilters(alert_file_name)
self.rule_dict = read_rules(rules_file_path)
self.rule_dict["Rule_Inpharmatica"] = False
for i in filters:
self.rule_dict["Rule_" + i] = True
if self.property_filters_flag:
(
self.rule_dict["HBA"],
self.rule_dict["HBD"],
self.rule_dict["LogP"],
self.rule_dict["MW"],
self.rule_dict["Rot"],
self.rule_dict["TPSA"],
) = (HBA, HBD, LogP, MW, Rot, TPSA)
else:
if "HBA" in self.rule_dict:
del self.rule_dict["HBA"]
if "HBD" in self.rule_dict:
del self.rule_dict["HBD"]
if "LogP" in self.rule_dict:
del self.rule_dict["LogP"]
if "MW" in self.rule_dict:
del self.rule_dict["MW"]
if "Rot" in self.rule_dict:
del self.rule_dict["Rot"]
if "TPSA" in self.rule_dict:
del self.rule_dict["TPSA"]
# del self.rule_dict['HBA'], self.rule_dict['HBD'], self.rule_dict['LogP'], self.rule_dict['MW'], self.rule_dict['Rot'], self.rule_dict['TPSA']
print_sys("MolFilter is using the following filters:")
for i, j in self.rule_dict.items():
if i[:4] == "Rule":
if j:
print_sys(i + ": " + str(j))
else:
print_sys(i + ": " + str(j))
rule_list = [
x.replace("Rule_", "")
for x in self.rule_dict.keys()
if x.startswith("Rule") and self.rule_dict[x]
]
rule_str = " and ".join(rule_list)
self.rf.build_rule_list(rule_list)
def __call__(self, input_data):
import multiprocessing as mp
from multiprocessing import Pool
import pandas as pd
if isinstance(input_data, str):
input_data = [input_data]
elif not isinstance(input_data, (list, np.ndarray, np.generic)):
raise ValueError(
"Input must be a list/numpy array of SMILES or one SMILES string!"
)
input_data = list(tuple(zip(input_data, list(range(len(input_data))))))
num_cores = int(mp.cpu_count())
p = Pool(num_cores)
res = list(p.map(self.rf.evaluate, input_data))
if self.property_filters_flag:
df = pd.DataFrame(
res,
columns=[
"SMILES",
"NAME",
"FILTER",
"MW",
"LogP",
"HBD",
"HBA",
"TPSA",
"Rot",
],
)
df_ok = df[
(df.FILTER == "OK")
& df.MW.between(*self.rule_dict["MW"])
& df.LogP.between(*self.rule_dict["LogP"])
& df.HBD.between(*self.rule_dict["HBD"])
& df.HBA.between(*self.rule_dict["HBA"])
& df.TPSA.between(*self.rule_dict["TPSA"])
& df.Rot.between(*self.rule_dict["Rot"])
]
else:
df = pd.DataFrame(
res,
columns=[
"SMILES",
"NAME",
"FILTER",
"MW",
"LogP",
"HBD",
"HBA",
"TPSA",
"Rot",
],
)
df_ok = df[(df.FILTER == "OK")]
return df_ok.SMILES.values