fatmacankara commited on
Commit
fa18295
·
1 Parent(s): c8c0720

Update code/add_structure.py

Browse files
Files changed (1) hide show
  1. code/add_structure.py +349 -182
code/add_structure.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import re
2
  import time
3
  import json
@@ -8,196 +9,361 @@ import requests
8
  import unipressed
9
  from requests.adapters import HTTPAdapter, Retry
10
  from unipressed import IdMappingClient
11
- """
12
- ## Code adapted from UniProt documentation.
13
- def get_pdb_ids_2(protein_id):
14
- POLLING_INTERVAL = 5
15
- API_URL = "https://rest.uniprot.org"
 
 
 
16
 
17
- retries = Retry(total=5, backoff_factor=0.25, status_forcelist=[500, 502, 503, 504])
18
- session = requests.Session()
19
- session.mount("https://", HTTPAdapter(max_retries=retries))
20
 
21
- def check_response(response):
22
- try:
23
- response.raise_for_status()
24
- except requests.HTTPError:
25
- print(response.json())
26
- raise
27
-
28
- def submit_id_mapping(from_db, to_db, ids):
29
- request = requests.post(
30
- f"{API_URL}/idmapping/run",
31
- data={"from": from_db, "to": to_db, "ids": ids},
32
- )
33
- check_response(request)
34
- if check_response != None:
35
- return request.json()["jobId"]
36
- else:
37
- return None
38
-
39
- def get_next_link(headers):
40
- re_next_link = re.compile(r'<(.+)>; rel="next"')
41
- if "Link" in headers:
42
- match = re_next_link.match(headers["Link"])
43
- if match:
44
- return match.group(1)
45
-
46
- def check_id_mapping_results_ready(job_id):
47
- print('entered')
48
- while True:
49
- print('True')
50
- print('HR-1')
51
- try:
52
- request = session.get(f"{API_URL}/idmapping/status/{job_id}")
53
- except requests.exceptions.RetryError:
54
- print('eneted')
55
- request = None
56
- print('HR0-22')
57
- check_response(request)
58
- j = request.json()
59
- print('HR0')
60
- try:
61
- print('HR1')
62
- if "jobStatus" in j:
63
- print('HR2')
64
- if j["jobStatus"] == "RUNNING":
65
- print(f"Retrying in {POLLING_INTERVAL}s")
66
- time.sleep(POLLING_INTERVAL)
67
- else:
68
- print('HR3')
69
- raise Exception(j["jobStatus"])
70
- except:
71
- print('HR4')
72
- requests.exceptions.RetryError
73
- else:
74
- print('HR4')
75
- return bool(j["results"] or j["failedIds"])
76
-
77
- def get_batch(batch_response, file_format, compressed):
78
- batch_url = get_next_link(batch_response.headers)
79
- while batch_url:
80
- batch_response = session.get(batch_url)
81
- batch_response.raise_for_status()
82
- yield decode_results(batch_response, file_format, compressed)
83
- batch_url = get_next_link(batch_response.headers)
84
-
85
- def combine_batches(all_results, batch_results, file_format):
86
- if file_format == "json":
87
- for key in ("results", "failedIds"):
88
- if key in batch_results and batch_results[key]:
89
- all_results[key] += batch_results[key]
90
- elif file_format == "tsv":
91
- return all_results + batch_results[1:]
92
- else:
93
- return all_results + batch_results
94
- return all_results
95
-
96
- def get_id_mapping_results_link(job_id):
97
- url = f"{API_URL}/idmapping/details/{job_id}"
98
-
99
- request = session.get(url)
100
- check_response(request)
101
- return request.json()["redirectURL"]
102
-
103
- def decode_results(response, file_format, compressed):
104
- if compressed:
105
- decompressed = zlib.decompress(response.content, 16 + zlib.MAX_WBITS)
106
- if file_format == "json":
107
- j = json.loads(decompressed.decode("utf-8"))
108
- return j
109
- elif file_format == "tsv":
110
- return [line for line in decompressed.decode("utf-8").split("\n") if line]
111
- elif file_format == "xlsx":
112
- return [decompressed]
113
- elif file_format == "xml":
114
- return [decompressed.decode("utf-8")]
115
- else:
116
- return decompressed.decode("utf-8")
117
- elif file_format == "json":
118
- return response.json()
119
- elif file_format == "tsv":
120
- return [line for line in response.text.split("\n") if line]
121
- elif file_format == "xlsx":
122
- return [response.content]
123
- elif file_format == "xml":
124
- return [response.text]
125
- return response.text
126
-
127
- def get_xml_namespace(element):
128
- m = re.match(r"\{(.*)\}", element.tag)
129
- return m.groups()[0] if m else ""
130
-
131
- def merge_xml_results(xml_results):
132
- merged_root = ElementTree.fromstring(xml_results[0])
133
- for result in xml_results[1:]:
134
- root = ElementTree.fromstring(result)
135
- for child in root.findall("{http://uniprot.org/uniprot}entry"):
136
- merged_root.insert(-1, child)
137
- ElementTree.register_namespace("", get_xml_namespace(merged_root[0]))
138
- return ElementTree.tostring(merged_root, encoding="utf-8", xml_declaration=True)
139
-
140
-
141
- def get_id_mapping_results_search(url):
142
- parsed = urlparse(url)
143
- query = parse_qs(parsed.query)
144
- file_format = query["format"][0] if "format" in query else "json"
145
- if "size" in query:
146
- size = int(query["size"][0])
147
- else:
148
- size = 500
149
- query["size"] = size
150
- compressed = (
151
- query["compressed"][0].lower() == "true" if "compressed" in query else False
152
- )
153
- parsed = parsed._replace(query=urlencode(query, doseq=True))
154
- url = parsed.geturl()
155
- request = session.get(url)
156
- check_response(request)
157
- results = decode_results(request, file_format, compressed)
158
- total = int(request.headers["x-total-results"])
159
- for i, batch in enumerate(get_batch(request, file_format, compressed), 1):
160
- results = combine_batches(results, batch, file_format)
161
- if file_format == "xml":
162
- return merge_xml_results(results)
163
- return results
164
-
165
-
166
- job_id = submit_id_mapping(
167
- from_db="UniProtKB_AC-ID", to_db="PDB", ids=protein_id
168
- )
169
- print('skhfkh')
170
- print(submit_id_mapping(
171
- from_db="UniProtKB_AC-ID", to_db="PDB", ids=protein_id
172
- ))
173
- print('nor', check_id_mapping_results_ready(job_id))
174
- if check_id_mapping_results_ready(job_id):
175
- link = get_id_mapping_results_link(job_id)
176
- results = get_id_mapping_results_search(link)
177
- return [i['to'] for i in results['results']]
178
- else:
179
- print('no i am here')
180
- return None
181
  def get_pdb_ids(protein_id):
182
  try:
183
  request = IdMappingClient.submit(
184
  source="UniProtKB_AC-ID", dest="PDB", ids={protein_id})
185
-
186
- try:
187
- pdb_list = list(request.each_result())
188
- time.sleep(1)
189
- return [i['to'] for i in pdb_list]
190
- except unipressed.id_mapping.core.IdMappingError:
191
- print('I AM HERE 1')
192
- get_pdb_ids_2(protein_id)
193
  except requests.exceptions.HTTPError:
194
- print('I AM HERE 2')
195
- get_pdb_ids_2(protein_id)
 
 
196
  except KeyError:
197
- print('I AM HERE 3')
198
- get_pdb_ids_2(protein_id)
199
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  def get_pdb_ids(protein_id):
202
  try:
203
  request = IdMappingClient.submit(
@@ -210,4 +376,5 @@ def get_pdb_ids(protein_id):
210
  print('IdMappingError caused by UniProt API service, please try later.')
211
  return []
212
  except KeyError:
213
- return []
 
 
1
+ import ast
2
  import re
3
  import time
4
  import json
 
9
  import unipressed
10
  from requests.adapters import HTTPAdapter, Retry
11
  from unipressed import IdMappingClient
12
+ import Bio
13
+ from Bio import SeqIO
14
+ import pandas as pd
15
+ import numpy as np
16
+ from pathlib import Path
17
+ from Bio.PDB import *
18
+ from io import StringIO
19
+ from utils import *
20
 
21
+ import math
 
 
22
 
23
+ import json
24
+ UNIPROT_ANNOTATION_COLS = ['disulfide', 'intMet', 'intramembrane', 'naturalVariant', 'dnaBinding',
25
+ 'activeSite',
26
+ 'nucleotideBinding', 'lipidation', 'site', 'transmembrane',
27
+ 'crosslink', 'mutagenesis', 'strand',
28
+ 'helix', 'turn', 'metalBinding', 'repeat', 'topologicalDomain',
29
+ 'caBinding', 'bindingSite', 'region',
30
+ 'signalPeptide', 'modifiedResidue', 'zincFinger', 'motif',
31
+ 'coiledCoil', 'peptide',
32
+ 'transitPeptide', 'glycosylation', 'propeptide', 'disulfideBinary',
33
+ 'intMetBinary', 'intramembraneBinary',
34
+ 'naturalVariantBinary', 'dnaBindingBinary', 'activeSiteBinary',
35
+ 'nucleotideBindingBinary', 'lipidationBinary', 'siteBinary',
36
+ 'transmembraneBinary', 'crosslinkBinary', 'mutagenesisBinary',
37
+ 'strandBinary', 'helixBinary', 'turnBinary', 'metalBindingBinary',
38
+ 'repeatBinary', 'topologicalDomainBinary', 'caBindingBinary',
39
+ 'bindingSiteBinary', 'regionBinary', 'signalPeptideBinary',
40
+ 'modifiedResidueBinary', 'zincFingerBinary', 'motifBinary',
41
+ 'coiledCoilBinary', 'peptideBinary', 'transitPeptideBinary',
42
+ 'glycosylationBinary', 'propeptideBinary']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def get_pdb_ids(protein_id):
44
  try:
45
  request = IdMappingClient.submit(
46
  source="UniProtKB_AC-ID", dest="PDB", ids={protein_id})
47
+ pdb_list = list(request.each_result())
48
+ return [i['to'] for i in pdb_list]
 
 
 
 
 
 
49
  except requests.exceptions.HTTPError:
50
+ return []
51
+ except unipressed.id_mapping.core.IdMappingError:
52
+ print('IdMappingError caused by UniProt API service, please try later.')
53
+ return []
54
  except KeyError:
55
+ return []
56
+
57
+
58
+ def fix_filename(filename):
59
+ try:
60
+ if Path(filename).suffix == '.pdb':
61
+ pass
62
+ elif Path(filename).stem.endswith("ent"):
63
+ filename_replace_ext = filename.with_name( Path(filename).stem[3:])
64
+ Path(filename).rename(filename_replace_ext.with_suffix('.pdb'))
65
+ elif Path(filename).stem.startswith("pdb"):
66
+ filename_replace_ext = Path(filename).with_name(Path(filename).stem[3:])
67
+ Path(filename).rename(filename_replace_ext.with_suffix('.pdb'))
68
+ else:
69
+ filename_replace_ext = filename.with_suffix(".pdb")
70
+ Path(filename).rename(filename_replace_ext)
71
+
72
+ except:
73
+ FileNotFoundError
74
+
75
+
76
+
77
+ def fetch_uniprot_ids(pdb_code):
78
+ response = requests.get(f"https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_code}")
79
+ response.raise_for_status()
80
+ resp = response.json()
81
+ return list(list(list(resp.values())[0].values())[0].keys())
82
+
83
+ def addPDBinfo(data, path_to_output_files):
84
+ # pdb_fasta = pd.DataFrame(columns=['pdbID', 'chain', 'pdbSequence'])
85
+ pdb_info = pd.DataFrame(columns=['uniprotID', 'pdbID', 'chain', 'resolution'])
86
+ print('Retrieving PDB structures...\n')
87
+ up_list = data.uniprotID.to_list()
88
+ pdbs = [get_pdb_ids(i) for i in up_list]
89
+
90
+ if len(pdbs) >= 1:
91
+ pdbs = [item for sublist in pdbs for item in sublist]
92
+ pdbs = list(filter(None, pdbs))
93
+ pdbs = set(pdbs)
94
+ pdbs = [i.lower() for i in pdbs]
95
+ else:
96
+ pdbs = []
97
+ print('No PDB structure found for the query. ')
98
+
99
+ print('\n>>Starting PDB structures download...\n')
100
+ print('\n>>Processing PDB structures...\n')
101
+ parser = PDBParser()
102
+ ppb = PPBuilder()
103
+
104
+ index = 0
105
+ for search in pdbs:
106
+ print(f'Searching for {search.upper()}')
107
+ try:
108
+ pdb_url = f"https://files.rcsb.org/download/{search}.pdb"
109
+ response = requests.get(pdb_url)
110
+ response.raise_for_status() # Check for a successful response
111
+ pdb_data = response.text
112
+ pdb_parser = PDBParser(QUIET=True) # QUIET=True suppresses warnings
113
+ pdb_file_content = StringIO(pdb_data)
114
+ structure = pdb_parser.get_structure(search, pdb_file_content)
115
+ pdb_data_list = pdb_data.split('\n')
116
+ pdb_data_list = [i for i in pdb_data_list if i.startswith('DBREF')]
117
+ pdb_data_list = [[list(filter(None, i.split(' '))) for j in i.split(' ') if j == 'UNP'] for
118
+ i in pdb_data_list]
119
+ pdb_data_list = [i for i in pdb_data_list if i != []]
120
+ header = structure.header
121
+ for unp in pdb_data_list:
122
+ if (unp[0][5] == 'UNP') & (unp[0][6].split('-')[0] in up_list):
123
+ pdb_info.at[index, 'uniprotID'] = unp[0][6].split('-')[0]
124
+ pdb_info.at[index, 'pdbID'] = unp[0][1].upper()
125
+ pdb_info.at[index, 'chain'] = unp[0][2].upper()
126
+ pdb_info.at[index, 'resolution'] = header.get('resolution', 'N/A')
127
+ pdb_info.at[index, 'start'] = unp[0][8]
128
+ pdb_info.at[index, 'end'] = unp[0][9]
129
+ index += 1
130
+ except:
131
+ continue
132
+ pdb_info.replace({'None': np.NaN}, inplace=True)
133
+ print('PDB file processing finished..')
134
+
135
+ return pdb_info
136
+ from add_sasa import *
137
+
138
+
139
+
140
+ def downloadPDB(pdbID, path_to_output_files):
141
+ pdbl = PDBList()
142
+ existing_pdb = list(Path(path_to_output_files / 'pdb_structures').glob("*"))
143
+ existing_pdb = [str(i) for i in existing_pdb]
144
+ existing_pdb = [i.split('/')[-1].split('.')[0].lower() for i in existing_pdb]
145
+ if pdbID not in existing_pdb:
146
+ # print(f'Downloading PDB file for {pdbID.upper()}..')
147
+ file = pdbl.retrieve_pdb_file(pdbID, pdir=Path(path_to_output_files / 'pdb_structures'), file_format="pdb")
148
+ fix_filename(file)
149
+ file = fix_filename(file)
150
+ file = Path(path_to_output_files / 'pdb_structures' / f'{pdbID}.pdb')
151
+ else:
152
+ print(f'PDB file for {pdbID.upper()} exists..')
153
+ file = Path(path_to_output_files / 'pdb_structures' / f'{pdbID}.pdb')
154
+ fix_filename(file)
155
+ file = fix_filename(file)
156
+
157
+ file = Path(path_to_output_files / 'pdb_structures' / f'{pdbID}.pdb')
158
+
159
+
160
+ existing_free_sasa = list(Path(path_to_output_files / 'freesasa_files').glob("*"))
161
+ existing_free_sasa = [str(i) for i in existing_free_sasa]
162
+ existing_free_sasa = [i.split('/')[-1].split('.')[0] for i in existing_free_sasa]
163
+ if pdbID not in existing_free_sasa:
164
+ run_freesasa(file, Path(path_to_output_files / 'freesasa_files' / f'{pdbID}.txt'), include_hetatms=True,
165
+ outdir=None, force_rerun=False, file_type='pdb')
166
+
167
+ return file
168
+
169
+ def processFile(data, path_to_output_files):
170
+ for i in data.index:
171
+ protein = data.at[i,'uniprotID']
172
+ pdbID = data.at[i,'pdbID'].lower()
173
+ chain = data.at[i,'chain']
174
+ pos = int(data.at[i, 'pos'])
175
+ wt = data.at[i, 'wt']
176
 
177
+
178
+ url = f'https://files.rcsb.org/download/{pdbID}.pdb'
179
+ response = requests.get(url)
180
+
181
+ if response.status_code == 200:
182
+ with open(f'{path_to_output_files}/pdb_structures/{pdbID}.pdb', 'w') as f:
183
+ f.write(response.text)
184
+ print(f"Downloaded {pdbID}.pdb successfully.")
185
+ else:
186
+ print(f"Failed to download {pdbID}.pdb. Status code: {response.status_code}")
187
+ file = Path(path_to_output_files / 'pdb_structures' / f'{pdbID}.pdb')
188
+
189
+
190
+ run_freesasa(file, Path(path_to_output_files / 'freesasa_files' / f'{pdbID}.txt'), include_hetatms=True,
191
+ outdir=None, force_rerun=False, file_type='pdb')
192
+
193
+
194
+
195
+ filename = Path(path_to_output_files / 'freesasa_files' / f'{pdbID}.txt')
196
+ data.loc[i, 'sasa'] = sasa(protein, pos, wt, 1, filename, path_to_output_files,file_type='pdb')
197
+
198
+
199
+
200
+ newCol = {}
201
+ with open(file, encoding="utf8") as f:
202
+ for line in f.readlines():
203
+ if line[0:4].strip() == 'ATOM' and line[13:15].strip() == 'CA' and line[21].upper() == chain.upper():
204
+ coords= [line[31:38].strip(), line[39:46].strip(), line[47:54].strip()]
205
+ resnums_for_sasa = line[22:26].strip()
206
+ newCol[resnums_for_sasa] = coords
207
+ elif line[0:4].strip() == 'ATOM' and line[13:15].strip() == 'CA' and line[21] == ' ':
208
+ coords= [line[31:38].strip(), line[39:46].strip(), line[47:54].strip()]
209
+ resnums_for_sasa = line[22:26].strip()
210
+ newCol[resnums_for_sasa] = coords
211
+ data.at[i, 'coordinates'] = json.dumps(newCol)
212
+ return data
213
+
214
+ def distance(x1, y1, z1, x2, y2, z2):
215
+ d = math.sqrt(math.pow(x2 - x1, 2) +
216
+ math.pow(y2 - y1, 2) +
217
+ math.pow(z2 - z1, 2) * 1.0)
218
+ return d
219
+
220
+
221
+ def find_distance(coordMut, coordAnnot):
222
+ if coordMut != np.NaN:
223
+ try:
224
+ dist = distance(float(coordMut[0]), float(coordMut[1]), float(coordMut[2]), float(coordAnnot[0]),
225
+ float(coordAnnot[1]), float(coordAnnot[2]))
226
+
227
+ return "%.2f" % dist
228
+ except:
229
+ ValueError
230
+ dist = 'nan'
231
+ return dist
232
+ else:
233
+ return np.NaN
234
+
235
+ def domainDistance(domStart, domEnd, coordinates, mutationPosition, matchList, posOnPDB):
236
+ resList = list(range(domStart, domEnd))
237
+ domainDistanceList = []
238
+ for i in resList:
239
+ try:
240
+ domainPos = ast.literal_eval(matchList)[str(i)]
241
+ coordMut = coordinates[str(posOnPDB)]
242
+ coordDomain = coordinates[str(domainPos)]
243
+ distance = find_distance(coordMut, coordDomain)
244
+ domainDistanceList.append(distance)
245
+ return min(domainDistanceList)
246
+ except KeyError:
247
+ domainDistanceList = np.NaN
248
+ return np.NaN
249
+
250
+
251
+
252
+ def match3D(data):
253
+ data.fillna(np.NaN, inplace=True)
254
+ for i in data.index:
255
+ coordinates = ast.literal_eval(data.at[i, 'coordinates'])
256
+ pos = str(data.at[i, 'pos'])
257
+ matchList = data.at[i, 'MATCHDICT']
258
+ try:
259
+ posOnPDB = ast.literal_eval(data.at[i, 'MATCHDICT'])[pos]
260
+ coordMut = coordinates[str(posOnPDB)]
261
+ if data.at[i, 'distance'] == -1000:
262
+ domStart = data.at[i, 'domStart']
263
+ domEnd = data.at[i, 'domEnd']
264
+ data.at[i, 'distance'] = domainDistance(domStart, domEnd, coordinates, pos, matchList, posOnPDB)
265
+ except KeyError:
266
+ posOnPDB = np.NaN
267
+ coordMut = np.NaN
268
+ data.at[i, 'distance'] = np.NaN
269
+
270
+
271
+ for col in UNIPROT_ANNOTATION_COLS[0:30]:
272
+ allDist = []
273
+ if (data.at[i, col] != np.NaN) & (data.at[i, col] != 'hit') & (data.at[i, col] != '[]')& (data.at[i, col] != []):
274
+ annotation_list = ast.literal_eval(data.at[i, col])
275
+ integer_list = [int(element) for element in annotation_list if element != 'null']
276
+ for annotPosition in integer_list:
277
+ coordAnnot = coordinates[str(annotPosition)]
278
+ distance = find_distance(coordMut, coordAnnot)
279
+ allDist.append(distance)
280
+ if len(allDist)>0:
281
+ data.at[i, col] = min(allDist)
282
+ return data
283
+
284
+
285
+ def domainDistanceModels(domStart, domEnd, coordinates, mutationPosition):
286
+ resList = list(range(domStart, domEnd))
287
+ domainDistanceList = []
288
+ for i in resList:
289
+ try:
290
+ coordMut = (coordinates)[mutationPosition]
291
+ coordDomain = (coordinates)[i]
292
+ distance = find_distance(coordMut, coordDomain)
293
+ domainDistanceList.append(distance)
294
+ return min(domainDistanceList)
295
+ except KeyError:
296
+ domainDistanceList = np.NaN
297
+ return np.NaN
298
+
299
+
300
+ def match3DModels(data):
301
+ data.fillna(np.NaN, inplace=True)
302
+ for i in data.index:
303
+ pos = int(data.at[i, 'pos'])
304
+ coords = data.at[i, 'coordinates']
305
+ if type(coords) != dict:
306
+ coordinates = ast.literal_eval(coords)
307
+ else:
308
+ coordinates = coords
309
+ pass
310
+ coordMut = coordinates[pos]
311
+ if data.at[i, 'distance'] == -1000:
312
+ domStart = data.at[i, 'domStart']
313
+ domEnd = data.at[i, 'domEnd']
314
+ data.at[i, 'distance'] = domainDistanceModels(domStart, domEnd, coordinates, pos)
315
+ for col in UNIPROT_ANNOTATION_COLS[0:30]:
316
+ allDist = []
317
+ if (data.at[i, col] != np.NaN) & (data.at[i, col] != 'hit') & (data.at[i, col] != '[]')& (data.at[i, col] != []):
318
+ annotation_list = ast.literal_eval(data.at[i, col])
319
+ integer_list = [int(element) for element in annotation_list]
320
+ for annotPosition in integer_list:
321
+ try:
322
+ coordAnnot = coordinates[annotPosition]
323
+ except KeyError:
324
+ coordAnnot = []
325
+ distance = find_distance(coordMut, coordAnnot)
326
+ allDist.append(distance)
327
+
328
+ if len(allDist)>0:
329
+ allDist = [float(i) for i in allDist]
330
+ data.at[i, col] = min(allDist)
331
+
332
+ return data
333
+
334
+
335
+ def selectMaxAnnot(data):
336
+ if len(data) >0:
337
+ for i in data.index:
338
+ total = 0
339
+ nanCounter = 0
340
+ hitCounter = 0
341
+ for col in UNIPROT_ANNOTATION_COLS[0:30]:
342
+ if (str(data.at[i,col]) != 'nan') and (data.at[i,col] != '[]' and (data.at[i,col] != 'hit')):
343
+ total += float(data.at[i,col])
344
+ elif (str(data.at[i,col]) == 'nan') or (data.at[i,col] == '[]' and (data.at[i,col] != 'hit')):
345
+ nanCounter +=1
346
+ if data.at[i,col] == 'hit':
347
+ hitCounter += 1
348
+
349
+ if hitCounter > 0:
350
+ data.at[i, 'hitTotal'] = hitCounter
351
+ else:
352
+ data.at[i, 'hitTotal'] = np.NaN
353
+
354
+ if nanCounter != 30:
355
+ data.at[i, 'annotTotal'] = total
356
+ else:
357
+ data.at[i, 'annotTotal'] = np.NaN
358
+ else:
359
+ data['annotTotal'] = np.NaN
360
+
361
+ return data
362
+
363
+
364
+
365
+
366
+ '''
367
  def get_pdb_ids(protein_id):
368
  try:
369
  request = IdMappingClient.submit(
 
376
  print('IdMappingError caused by UniProt API service, please try later.')
377
  return []
378
  except KeyError:
379
+ return []
380
+ '''