# -*- coding: utf-8 -*-
# Author: TDC Team
# License: MIT
import numpy as np
from typing import List
try:
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from rdkit import rdBase
rdBase.DisableLog('rdApp.error')
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem import MACCSkeys
except:
raise ImportError("Please install rdkit by 'conda install -c conda-forge rdkit'! ")
from ...utils import print_sys
from ..oracle.oracle import smiles_to_rdkit_mol, smiles_2_fingerprint_ECFP4, smiles_2_fingerprint_FCFP4, smiles_2_fingerprint_AP, smiles_2_fingerprint_ECFP6
from ._smiles2pubchem import smiles2pubchem
[docs]def canonicalize(smiles):
mol = Chem.MolFromSmiles(smiles)
if mol is not None:
return Chem.MolToSmiles(mol, isomericSmiles=True)
else:
return None
[docs]def smiles2morgan(s, radius = 2, nBits = 1024):
"""Convert smiles into Morgan Fingerprint.
Args:
smiles: str
radius: int (default: 2)
nBits: int (default: 1024)
Returns:
fp: numpy.array
"""
try:
s = canonicalize(s)
mol = Chem.MolFromSmiles(s)
features_vec = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits)
features = np.zeros((1,))
DataStructs.ConvertToNumpyArray(features_vec, features)
except:
print_sys('rdkit not found this smiles for morgan: ' + s + ' convert to all 0 features')
features = np.zeros((nBits, ))
return features
[docs]def smiles2rdkit2d(s):
"""Convert smiles into 200-dim Normalized RDKit 2D vector.
Args:
smiles: str
Returns:
fp: numpy.array
"""
s = canonicalize(s)
try:
from descriptastorus.descriptors import rdDescriptors, rdNormalizedDescriptors
except:
raise ImportError("Please install pip install git+https://github.com/bp-kelley/descriptastorus and pip install pandas-flavor")
try:
generator = rdNormalizedDescriptors.RDKit2DNormalized()
features = np.array(generator.process(s)[1:])
NaNs = np.isnan(features)
features[NaNs] = 0
except:
print_sys('descriptastorus not found this smiles: ' + s + ' convert to all 0 features')
features = np.zeros((200, ))
return np.array(features)
[docs]def smiles2daylight(s):
"""Convert smiles into 2048-dim Daylight feature.
Args:
smiles: str
Returns:
fp: numpy.array
"""
try:
s = canonicalize(s)
NumFinger = 2048
mol = Chem.MolFromSmiles(s)
bv = FingerprintMols.FingerprintMol(mol)
temp = tuple(bv.GetOnBits())
features = np.zeros((NumFinger, ))
features[np.array(temp)] = 1
except:
print_sys('rdkit not found this smiles: ' + s + ' convert to all 0 features')
features = np.zeros((2048, ))
return np.array(features)
[docs]def smiles2maccs(s):
"""Convert smiles into maccs feature.
Args:
smiles: str
Returns:
fp: numpy.array
"""
s = canonicalize(s)
mol = Chem.MolFromSmiles(s)
fp = MACCSkeys.GenMACCSKeys(mol)
arr = np.zeros((0,), dtype=np.float64)
DataStructs.ConvertToNumpyArray(fp,arr)
return arr
'''
ECFP2 ---- 1
ECFP4 ---- 2
ECFP6 ---- 3
xxxxxxxxx ------ https://github.com/rdkit/benchmarking_platform/blob/master/scoring/fingerprint_lib.py
'''
[docs]def smiles2ECFP2(smiles):
"""Convert smiles into ECFP2 Morgan Fingerprint.
Args:
smiles: str
Returns:
fp: rdkit.DataStructs.cDataStructs.UIntSparseIntVect
"""
nbits = 2048
smiles = canonicalize(smiles)
molecule = smiles_to_rdkit_mol(smiles)
fp = AllChem.GetMorganFingerprintAsBitVect(molecule, 1, nBits=nbits)
arr = np.zeros((0,), dtype=np.float64)
DataStructs.ConvertToNumpyArray(fp,arr)
return arr
[docs]def smiles2ECFP4(smiles):
"""Convert smiles into ECFP4 Morgan Fingerprint.
Args:
smiles: str
Returns:
fp: rdkit.DataStructs.cDataStructs.UIntSparseIntVect
"""
nbits = 2048
smiles = canonicalize(smiles)
molecule = smiles_to_rdkit_mol(smiles)
fp = AllChem.GetMorganFingerprintAsBitVect(molecule, 2, nBits=nbits)
arr = np.zeros((0,), dtype=np.float64)
DataStructs.ConvertToNumpyArray(fp,arr)
return arr
[docs]def smiles2ECFP6(smiles):
"""Convert smiles into ECFP6 Morgan Fingerprint.
Args:
smiles: str, a SMILES string
Returns:
fp: rdkit.DataStructs.cDataStructs.UIntSparseIntVect
refer: https://github.com/rdkit/benchmarking_platform/blob/master/scoring/fingerprint_lib.py
"""
nbits = 2048
smiles = canonicalize(smiles)
molecule = smiles_to_rdkit_mol(smiles)
fp = AllChem.GetMorganFingerprintAsBitVect(molecule, 3, nBits=nbits)
arr = np.zeros((0,), dtype=np.float64)
DataStructs.ConvertToNumpyArray(fp,arr)
return arr
# def smiles2smart(smiles):
[docs]class MoleculeFingerprint:
'''
Example:
MolFP = MoleculeFingerprint(fp = 'ECFP6')
out = MolFp('Clc1ccccc1C2C(=C(/N/C(=C2/C(=O)OCC)COCCN)C)\C(=O)OC')
# np.array([1, 0, 1, .....])
out = MolFp(['Clc1ccccc1C2C(=C(/N/C(=C2/C(=O)OCC)COCCN)C)\C(=O)OC',
'CCCOc1cc2ncnc(Nc3ccc4ncsc4c3)c2cc1S(=O)(=O)C(C)(C)C'])
# np.array([[1, 0, 1, .....],
[0, 0, 1, .....]])
Supporting FPs:
Basic_Descriptors(atoms, chirality, ....), ECFP2, ECFP4, ECFP6, MACCS, Daylight-type, RDKit2D, Morgan, PubChem
'''
def __init__(self, fp = 'ECFP4'):
fp2func = {'ECFP2': smiles2ECFP2,
'ECFP4': smiles2ECFP4,
'ECFP6': smiles2ECFP6,
'MACCS': smiles2maccs,
'Daylight': smiles2daylight,
'RDKit2D': smiles2rdkit2d,
'Morgan': smiles2morgan,
'PubChem': smiles2pubchem}
try:
assert fp in fp2func
except:
raise Exception("The fingerprint you specify are not supported. \
It can only among 'ECFP2', 'ECFP4', 'ECFP6', 'MACCS', 'Daylight', 'RDKit2D', 'Morgan', 'PubChem'")
self.fp = fp
self.func = fp2func[fp]
def __call__(self, x):
if type(x)==str:
return self.func(x)
elif type(x)==list:
lst = list(map(self.func, x))
arr = np.vstack(lst)
return arr
[docs]def smiles2selfies(smiles):
"""Convert smiles into selfies.
Args:
smiles: str, a SMILES string
Returns:
selfies: str, a SELFIES string.
"""
smiles = canonicalize(smiles)
return sf.encoder(smiles)
[docs]def selfies2smiles(selfies):
"""Convert selfies into smiles.
Args:
selfies: str, a SELFIES string.
Returns:
smiles: str, a SMILES string
"""
return canonicalize(sf.decoder(selfies))
[docs]def smiles2mol(smiles):
"""Convert SMILES string into rdkit.Chem.rdchem.Mol.
Args:
smiles: str, a SMILES string.
Returns:
mol: rdkit.Chem.rdchem.Mol
"""
smiles = canonicalize(smiles)
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return None
Chem.Kekulize(mol)
return mol
[docs]def bondtype2idx(bond_type):
if bond_type == Chem.rdchem.BondType.SINGLE:
return 1
elif bond_type == Chem.rdchem.BondType.DOUBLE:
return 2
elif bond_type == Chem.rdchem.BondType.TRIPLE:
return 3
elif bond_type == Chem.rdchem.BondType.AROMATIC:
return 4
[docs]def smiles2graph2D(smiles):
"""convert SMILES string into two-dimensional molecular graph feature
Args:
smiles, str, a SMILES string
Returns:
idx2atom: dict, map from index to atom's symbol, e.g., {0:'C', 1:'N', ...}
adj_matrix: np.array
"""
smiles = canonicalize(smiles)
mol = smiles2mol(smiles)
n_atoms = mol.GetNumAtoms()
idx2atom = {atom.GetIdx():atom.GetSymbol() for atom in mol.GetAtoms()}
adj_matrix = np.zeros((n_atoms, n_atoms), dtype = int)
for bond in mol.GetBonds():
a1 = bond.GetBeginAtom()
a2 = bond.GetEndAtom()
idx1 = a1.GetIdx()
idx2 = a2.GetIdx()
bond_type = bond.GetBondType()
bond_idx = bondtype2idx(bond_type)
adj_matrix[idx1,idx2] = bond_idx
adj_matrix[idx2,idx1] = bond_idx
return idx2atom, adj_matrix
[docs]def get_mol(smiles):
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return None
Chem.Kekulize(mol)
return mol
############### PyG begin ###############
ELEM_LIST = ['C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br', 'Mg', 'Na', 'Ca', 'Fe', 'Al', 'I', 'B', 'K', 'Se', 'Zn', 'H', 'Cu', 'Mn', 'unknown']
ATOM_FDIM = len(ELEM_LIST) + 6 + 5 + 4 + 1
BOND_FDIM = 5 + 6
MAX_NB = 6
# https://github.com/kexinhuang12345/DeepPurpose/blob/master/DeepPurpose/chemutils.py
[docs]def onek_encoding_unk(x, allowable_set):
if x not in allowable_set:
x = allowable_set[-1]
return list(map(lambda s: x == s, allowable_set))
[docs]def get_atom_features(atom):
return torch.Tensor(onek_encoding_unk(atom.GetSymbol(), ELEM_LIST)
+ onek_encoding_unk(atom.GetDegree(), [0,1,2,3,4,5])
+ onek_encoding_unk(atom.GetFormalCharge(), [-1,-2,1,2,0])
+ onek_encoding_unk(int(atom.GetChiralTag()), [0,1,2,3])
+ [atom.GetIsAromatic()])
[docs]def smiles2PyG(smiles):
"""convert SMILES string into torch_geometric.data.Data
Args:
smiles, str, a SMILES string
Returns:
data, torch_geometric.data.Data
"""
smiles = canonicalize(smiles)
mol = Chem.MolFromSmiles(smiles)
n_atoms = mol.GetNumAtoms()
atom_features = [get_atom_features(atom) for atom in mol.GetAtoms()]
atom_features = torch.stack(atom_features)
y = [atom.GetSymbol() for atom in mol.GetAtoms()]
y = list(map(lambda x: ELEM_LIST.index(x) if x in ELEM_LIST else len(ELEM_LIST)-1 , y))
y = torch.LongTensor(y)
bond_features = []
for bond in mol.GetBonds():
a1 = bond.GetBeginAtom()
a2 = bond.GetEndAtom()
idx1 = a1.GetIdx()
idx2 = a2.GetIdx()
bond_features.extend([[idx1, idx2], [idx2, idx1]])
bond_features = torch.LongTensor(bond_features)
data = Data(x=atom_features, edge_index=bond_features.T)
return data
[docs]def molfile2PyG(molfile):
smiles = molfile2smiles(molfile)
smiles = canonicalize(smiles)
return smiles2PyG(smiles)
############### PyG end ###############
############### DGL begin ###############
[docs]def smiles2DGL(smiles):
"""convert SMILES string into dgl.DGLGraph
Args:
smiles, str, a SMILES string
Returns:
g: dgl.DGLGraph()
"""
smiles = canonicalize(smiles)
mol = Chem.MolFromSmiles(smiles)
n_atoms = mol.GetNumAtoms()
bond_features = []
for bond in mol.GetBonds():
a1 = bond.GetBeginAtom()
a2 = bond.GetEndAtom()
idx1 = a1.GetIdx()
idx2 = a2.GetIdx()
bond_features.extend([[idx1, idx2], [idx2, idx1]])
src, dst = tuple(zip(*bond_features))
g = dgl.DGLGraph()
g.add_nodes(n_atoms)
g.add_edges(src, dst)
return g
############### DGL end ###############
from ._xyz2mol import xyzfile2mol
[docs]def mol2smiles(mol):
smiles = Chem.MolToSmiles(mol)
smiles = canonicalize(smiles)
return smiles
[docs]def xyzfile2smiles(xyzfile):
"""convert xyzfile into smiles string.
Args:
xyzfile: str, file
Returns:
smiles: str, a SMILES string
"""
mol, _ = xyzfile2mol(xyzfile)
smiles = mol2smiles(mol)
smiles = canonicalize(smiles)
return smiles
[docs]def xyzfile2selfies(xyzfile):
"""convert xyzfile into SELFIES string.
Args:
xyzfile: str, file
Returns:
selfies: str, a SELFIES string.
"""
smiles = xyzfile2smiles(xyzfile)
smiles = canonicalize(smiles)
selfies = smiles2selfies(smiles)
return selfies
[docs]def distance3d(coordinate_1, coordinate_2):
return np.sqrt(sum([(c1-c2)**2 for c1,c2 in zip(coordinate_1, coordinate_2)]))
[docs]def upper_atom(atomsymbol):
return atomsymbol[0].upper() + atomsymbol[1:]
[docs]def xyzfile2graph3d(xyzfile):
atoms, charge, xyz_coordinates = read_xyz_file(file)
num_atoms = len(atoms)
distance_adj_matrix = np.zeros((num_atoms, num_atoms))
for i in range(num_atoms):
for j in range(i+1, num_atoms):
distance = distance3d(xyz_coordinates[i], xyz_coordinates[j])
distance_adj_matrix[i,j] = distance_adj_matrix[j,i] = distance
idx2atom = {idx:upper_atom(str_atom(atom)) for idx,atom in enumerate(atoms)}
mol, BO = xyzfile2mol(xyzfile)
return idx2atom, distance_adj_matrix, BO
############## end xyz2mol ################
[docs]def sdffile2smiles_lst(sdffile):
"""convert SDF file into a list of SMILES string.
Args:
sdffile: str, file
Returns:
smiles_lst: a list of SMILES strings.
"""
from rdkit.Chem.PandasTools import LoadSDF
df = LoadSDF(sdffile, smilesName='SMILES')
smiles_lst = df['SMILES'].to_list()
return smiles_lst
[docs]def sdffile2graph3d_lst(sdffile):
"""convert SDF file into a list of 3D graph.
Args:
sdffile: SDF file
Returns:
graph3d_lst: a list of 3D graph.
each graph has (i) idx2atom (dict); (ii) distance_adj_matrix (np.array); (iii) bondtype_adj_matrix (np.array)
"""
mol_conformer_lst = sdffile2mol_conformer(sdffile)
graph3d_lst = mol_conformer2graph3d(mol_conformer_lst)
return graph3d_lst
[docs]def sdffile2selfies_lst(sdf):
"""convert sdffile into a list of SELFIES strings.
Args:
sdffile: str, file
Returns:
selfies_lst: a list of SELFIES strings.
"""
smiles_lst = sdffile2smiles_lst(sdf)
selfies_lst = list(map(smiles2selfies, smiles_lst))
return selfies_lst
[docs]def smiles_lst2coulomb(smiles_lst):
"""convert a list of SMILES strings into coulomb format.
Args:
smiles_lst: a list of SELFIES strings.
Returns:
features: np.array
"""
molecules = [Molecule(smiles, 'smiles') for smiles in smiles_lst]
for mol in molecules:
mol.to_xyz(optimizer='UFF')
cm = CoulombMatrix(cm_type='UM', n_jobs=-1)
features = cm.represent(molecules)
features = features.to_numpy()
return features
## (nmol, max_atom_n**2),
## where max_atom_n is maximal number of atom in the smiles_lst
## features[i].reshape(max_atom_n, max_atom_n)[:3,:3] -> 3*3 Coulomb matrix
[docs]def sdffile2coulomb(sdf):
"""convert sdffile into a list of coulomb feature.
Args:
sdffile: str, file
Returns:
coulomb feature: np.array
"""
smiles_lst = sdffile2smiles_lst(sdf)
return smiles_lst2coulomb(smiles_lst)
[docs]def xyzfile2coulomb(xyzfile):
smiles = xyzfile2smiles(xyzfile)
smiles = canonicalize(smiles)
return smiles_lst2coulomb([smiles])
#2D_format = ['SMILES', 'SELFIES', 'Graph2D', 'PyG', 'DGL', 'ECFP2', 'ECFP4', 'ECFP6', 'MACCS', 'Daylight', 'RDKit2D', 'Morgan', 'PubChem']
#3D_format = ['Graph3D', 'Coulumb']
## XXX2smiles
[docs]def molfile2smiles(molfile):
"""convert molfile into SMILES string
Args:
molfile: str, a file.
Returns:
smiles: str, SMILES strings
"""
mol = Chem.MolFromMolFile(molfile)
smiles = Chem.MolToSmiles(mol)
smiles = canonicalize(smiles)
return smiles
[docs]def mol2file2smiles(molfile):
"""convert mol2file into SMILES string
Args:
mol2file: str, a file.
Returns:
smiles: str, SMILES strings
"""
mol = Chem.MolFromMol2File(molfile)
smiles = Chem.MolToSmiles(mol)
smiles = canonicalize(smiles)
return smiles
## smiles2xxx
atom_types = ['C', 'N', 'O', 'H', 'F', 'unknown'] ### Cl, S?
[docs]def atom2onehot(atom):
""" convert atom to one-hot feature vector
Args:
'C'
Returns:
[1, 0, 0, 0, 0, ..]
"""
onehot = np.zeros((1,len(atom_types)))
idx = atom_types.index(atom)
onehot[0,idx] = 1
return onehot
[docs]def atomstring2atomfeature(atom_string_list):
atom_features = [atom2onehot(atom) for atom in atom_string_list]
atom_features = np.concatenate(atom_features, 0)
return atom_features
[docs]def raw3D2pyg(raw3d_feature):
"""convert raw3d feature to pyg (torch-geometric) feature
Args:
raw3d_feature: (atom_string_list, positions, y)
- atom_string_list: list, each element is an atom, length is N
- positions: np.array, shape: (N,3)
- y: float
Returns:
data = Data(x=x, pos=pos, y=y)
"""
import torch
from torch_geometric.data import Data ### global
# atom_string_list, positions, y = raw3d_feature
atom_string_list, positions = raw3d_feature
atom_features = atomstring2atomfeature(atom_string_list)
atom_features = torch.from_numpy(atom_features)
positions = torch.from_numpy(positions)
# y = torch.FloatTensor(y)
# data = Data(x = atom_features, pos = positions, y = y)
data = Data(x = atom_features, pos = positions)
return data
convert_dict = {
'SMILES': ['SELFIES', 'Graph2D', 'PyG', 'DGL', 'ECFP2', 'ECFP4', 'ECFP6', 'MACCS', 'Daylight', 'RDKit2D', 'Morgan', 'PubChem'],
'SELFIES': ['SMILES', 'Graph2D', 'PyG', 'DGL', 'ECFP2', 'ECFP4', 'ECFP6', 'MACCS', 'Daylight', 'RDKit2D', 'Morgan', 'PubChem'],
'mol': ['SMILES', 'SELFIES', 'Graph2D', 'PyG', 'DGL', 'ECFP2', 'ECFP4', 'ECFP6', 'MACCS', 'Daylight', 'RDKit2D', 'Morgan', 'PubChem'],
'mol2': ['SMILES', 'SELFIES', 'Graph2D', 'PyG', 'DGL', 'ECFP2', 'ECFP4', 'ECFP6', 'MACCS', 'Daylight', 'RDKit2D', 'Morgan', 'PubChem'],
'SDF': ['SMILES', 'SELFIES', 'Graph3D', 'Coulumb'],
'XYZ': ['SMILES', 'SELFIES', 'Graph3D', 'Coulumb'],
'Raw3D': ['PyG3D'],
}
fingerprints_list = ['ECFP2', 'ECFP4', 'ECFP6', 'MACCS', 'Daylight', 'RDKit2D', 'Morgan', 'PubChem']
twoD_format = ['SMILES', 'SELFIES', 'mol', 'mol2', ]
threeD_format = ['SDF', 'XYZ', 'PyG3D', 'Raw3D', 'distance', 'Coulumb', 'shape', ] ### shape:mesh
[docs]class MolConvert:
"""MolConvert: convert the molecule from src formet to dst format.
Example:
convert = MolConvert(src = ‘SMILES’, dst = ‘Graph2D’)
g = convert(‘Clc1ccccc1C2C(=C(/N/C(=C2/C(=O)OCC)COCCN)C)\C(=O)OC’)
# g: graph with edge, node features
g = convert(['Clc1ccccc1C2C(=C(/N/C(=C2/C(=O)OCC)COCCN)C)\C(=O)OC',
'CCCOc1cc2ncnc(Nc3ccc4ncsc4c3)c2cc1S(=O)(=O)C(C)(C)C'])
# g: a list of graphs with edge, node features
if src is 2D, dst can be only 2D output
if src is 3D, dst can be both 2D and 3D outputs
src: 2D - [SMILES, SELFIES]
3D - [SDF file, XYZ file]
dst: 2D - [2D Graph (+ PyG, DGL format), Canonical SMILES, SELFIES, Fingerprints]
3D - [3D graphs (adj matrix entry is (distance, bond type)), Coulumb Matrix]
"""
def __init__(self, src = 'SMILES', dst = 'Graph2D', radius = 2, nBits = 1024):
self._src = src
self._dst = dst
self._radius = radius
self._nbits = nBits
self.convert_dict = convert_dict
if 'SELFIES' == src or 'SELFIES' == dst:
try:
import selfies as sf
global sf
except:
raise Exception("Please install selfies via 'pip install selfies'")
if 'Coulumb' == dst:
try:
from chemml.chem import CoulombMatrix, Molecule
global CoulombMatrix, Molecule
except:
raise Exception("Please install chemml via 'pip install pybel' and 'pip install chemml'. ")
if 'PyG' == dst:
try:
import torch
from torch_geometric.data import Data
global torch
global Data
except:
raise Exception("Please install PyTorch Geometric via 'https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html'.")
if 'DGL' == dst:
try:
import dgl
global dgl
except:
raise Exception("Please install DGL via 'pip install dgl'.")
try:
assert src in self.convert_dict
except:
raise Exception("src format is not supported")
try:
assert dst in self.convert_dict[src]
except:
raise Exception('It is not supported to convert src to dst.')
if src in twoD_format:
### 1. src -> SMILES
if src == "SMILES":
f1 = canonicalize
elif src == "SELFIES":
f1 = selfies2smiles
elif src == "mol":
f1 = molfile2smiles
elif src == "mol2":
f1 = mol2file2smiles
### 2. SMILES -> all
# 'SMILES', 'SELFIES', 'Graph2D', 'PyG', 'DGL', 'ECFP2', 'ECFP4', 'ECFP6', 'MACCS', 'Daylight', 'RDKit2D', 'Morgan', 'PubChem'
if dst == 'SMILES':
f2 = canonicalize
elif dst == 'SELFIES':
f2 = smiles2selfies
elif dst == "Graph2D":
f2 = smiles2graph2D
elif dst == "PyG":
f2 = smiles2PyG
elif dst == "DGL":
f2 = smiles2DGL
elif dst == "ECFP2":
f2 = smiles2ECFP2
elif dst == "ECFP4":
f2 = smiles2ECFP4
elif dst == "ECFP6":
f2 = smiles2ECFP6
elif dst == "MACCS":
f2 = smiles2maccs
elif dst == "Daylight":
f2 = smiles2daylight
elif dst == "RDKit2D":
f2 = smiles2rdkit2d
elif dst == "Morgan":
f2 = smiles2morgan
elif dst == 'PubChem':
f2 = smiles2pubchem
self.func = lambda x:f2(f1(x))
elif src in threeD_format:
pass
### load from xyz file, input is a filename (str), only contain one smiles
if src == 'XYZ' and dst == 'SMILES':
self.func = xyzfile2smiles
elif src == 'XYZ' and dst == 'SELFIES':
self.func = xyzfile2selfies
elif src == 'XYZ' and dst == 'Graph3D':
self.func = xyzfile2graph3d
elif src == 'XYZ' and dst == 'Coulumb':
self.func = xyzfile2coulomb
### SDF file
elif src == 'SDF' and dst == 'Graph3D':
self.func = sdffile2graph3d_lst
elif src == 'SDF' and dst == 'SMILES':
self.func = sdffile2smiles_lst
elif src == 'SDF' and dst == 'SELFIES':
self.func = sdffile2selfies_lst
elif src == 'SDF' and dst == 'Coulumb':
self.func = sdffile2coulomb
elif src == 'Raw3D' and dst == 'PyG3D':
self.func = raw3D2pyg
def __call__(self, x):
if type(x) == np.ndarray:
x = x.tolist()
if type(x) == str:
if self.func != smiles2morgan:
return self.func(x)
else:
return self.func(x, radius = self._radius, nBits = self._nbits)
elif type(x) == list:
if self.func != smiles2morgan:
out = list(map(self.func, x))
else:
lst = []
for x0 in x:
lst.append(self.func(x0, radius = self._radius, nBits = self._nbits))
out = lst
if self._dst in fingerprints_list:
out = np.array(out)
return out