"""Utilities functions for query
"""
import json
import os, sys
try:
from urllib.error import HTTPError
from urllib.parse import quote, urlencode
from urllib.request import urlopen
except ImportError:
from urllib import urlencode
from urllib2 import quote, urlopen, HTTPError
def _parse_prop(search, proplist):
"""Extract property value from record using the given urn search filter.
"""
props = [i for i in proplist if all(item in i['urn'].items() for item in search.items())]
if len(props) > 0:
return props[0]['value'][list(props[0]['value'].keys())[0]]
[docs]def request(identifier, namespace='cid', domain='compound', operation=None, output='JSON', searchtype=None):
"""
copied from https://github.com/mcs07/PubChemPy/blob/e3c4f4a9b6120433e5cc3383464c7a79e9b2b86e/pubchempy.py#L238
Construct API request from parameters and return the response.
Full specification at http://pubchem.ncbi.nlm.nih.gov/pug_rest/PUG_REST.html
"""
API_BASE = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug'
text_types = str, bytes
if not identifier:
raise ValueError('identifier/cid cannot be None')
# If identifier is a list, join with commas into string
if isinstance(identifier, int):
identifier = str(identifier)
if not isinstance(identifier, text_types):
identifier = ','.join(str(x) for x in identifier)
# Build API URL
urlid, postdata = None, None
if namespace == 'sourceid':
identifier = identifier.replace('/', '.')
if namespace in ['listkey', 'formula', 'sourceid'] \
or searchtype == 'xref' \
or (searchtype and namespace == 'cid') or domain == 'sources':
urlid = quote(identifier.encode('utf8'))
else:
postdata = urlencode([(namespace, identifier)]).encode('utf8')
comps = filter(None, [API_BASE, domain, searchtype, namespace, urlid, operation, output])
apiurl = '/'.join(comps)
# Make request
response = urlopen(apiurl, postdata)
return response
[docs]def uniprot2seq(ProteinID):
"""Get protein sequence from Uniprot ID
Args:
ProteinID (str): the uniprot ID
Returns:
str: amino acid sequence
"""
import urllib
import string
import urllib.request as ur
ID = str(ProteinID)
localfile = ur.urlopen('http://www.uniprot.org/uniprot/' + ID + '.fasta')
temp = localfile.readlines()
res = ''
for i in range(1, len(temp)):
res = res + temp[i].strip().decode("utf-8")
return res
[docs]def cid2smiles(cid):
"""SMILES string from PubChem CID
Args:
cid (str): PubChem CID
Returns:
str: SMILES string
"""
try:
smiles = _parse_prop({'label': 'SMILES', 'name': 'Canonical'}, json.loads(request(cid).read().decode())['PC_Compounds'][0]['props'])
except:
print('cid ' + str(cid) + ' failed, use NULL string')
smiles = 'NULL'
return smiles