Spaces:
Sleeping
Sleeping
Commit
·
92d57b2
1
Parent(s):
c975dff
Update code/pdb_featureVector.py
Browse files- code/pdb_featureVector.py +4 -35
code/pdb_featureVector.py
CHANGED
@@ -163,8 +163,7 @@ def pdb(input_set, mode, impute):
|
|
163 |
data.at[i, 'wt_sequence_match'] = 'i'
|
164 |
data.at[i, 'whichIsoform'] = whichIsoform
|
165 |
break
|
166 |
-
|
167 |
-
st.write(data)
|
168 |
data.wt_sequence_match = data.wt_sequence_match.astype('str')
|
169 |
data.replace({'': 'nan'}, inplace=True)
|
170 |
data_size = len(data.drop_duplicates(['datapoint']))
|
@@ -288,12 +287,6 @@ def pdb(input_set, mode, impute):
|
|
288 |
pdb_info.at[index, 'chain'] = chain_id
|
289 |
pdb_info.at[index, 'resolution'] = resolution
|
290 |
index += 1
|
291 |
-
|
292 |
-
print()
|
293 |
-
st.write('PDB INFO')
|
294 |
-
st.write(pdb_info)
|
295 |
-
st.write('PDB FASTA')
|
296 |
-
st.write('pdb_info')
|
297 |
print('PDB file processing finished..')
|
298 |
for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
|
299 |
try:
|
@@ -432,24 +425,12 @@ def pdb(input_set, mode, impute):
|
|
432 |
existing_pdb = None
|
433 |
with_pdb_size = len(with_pdb.drop_duplicates(['datapoint']))
|
434 |
with_pdb = None
|
435 |
-
|
436 |
-
st.write(dfM)
|
437 |
-
st.write('dfNM')
|
438 |
-
st.write(dfNM)
|
439 |
|
440 |
print('Aligning sequences...\n')
|
441 |
aligned_m = final_stage(dfM, annotation_list, Path(path_to_output_files / 'alignment_files'))
|
442 |
aligned_nm = final_stage(dfNM, annotation_list, Path(path_to_output_files / 'alignment_files'))
|
443 |
|
444 |
-
|
445 |
-
st.write('aligned_m')
|
446 |
-
st.write(aligned_m.iloc[0])
|
447 |
-
st.write(aligned_m.iloc[1])
|
448 |
-
st.write(aligned_m.iloc[2])
|
449 |
-
st.write(aligned_m.iloc[3])
|
450 |
-
st.write(aligned_m.iloc[4])
|
451 |
-
st.write(aligned_m.iloc[5])
|
452 |
-
|
453 |
|
454 |
|
455 |
|
@@ -493,8 +474,7 @@ def pdb(input_set, mode, impute):
|
|
493 |
yes_pdb_no_match = after_up_pdb_alignment[
|
494 |
(after_up_pdb_alignment.pdbID != 'nan') & (after_up_pdb_alignment.mutationPositionOnPDB == 'nan')]
|
495 |
no_pdb = no_pdb.copy()
|
496 |
-
|
497 |
-
st.write(pdb_aligned)
|
498 |
print('PDB matching is completed...\n')
|
499 |
print('SUMMARY')
|
500 |
print('-------')
|
@@ -895,7 +875,6 @@ def pdb(input_set, mode, impute):
|
|
895 |
if protein not in existing_modbase_models:
|
896 |
print('Downloading Modbase models for ', protein)
|
897 |
url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
|
898 |
-
print(url)
|
899 |
req = requests.get(url)
|
900 |
name = path_to_output_files / 'modbase_structures' / f'{protein}.txt'
|
901 |
with open(name, 'wb') as f:
|
@@ -1403,11 +1382,9 @@ def pdb(input_set, mode, impute):
|
|
1403 |
data['uniprotSequence'] = data['uniprotSequence'].str.replace('U', 'C')
|
1404 |
data['pdbSequence'] = data['pdbSequence'].str.replace('U', 'C')
|
1405 |
for i in data.index:
|
1406 |
-
st.write('iiiii',i)
|
1407 |
id_ = data.at[i, 'pdbID'].lower()
|
1408 |
up_id_ = data.at[i, 'uniprotID']
|
1409 |
score_ = str(data.at[i, 'score'])
|
1410 |
-
st.write(id_, up_id_, score_)
|
1411 |
"""
|
1412 |
if data.at[i, 'source'] == 'PDB':
|
1413 |
pdb_path = Path(path_to_output_files / 'pdb_structures' / f'{id_}.pdb')
|
@@ -1421,15 +1398,7 @@ def pdb(input_set, mode, impute):
|
|
1421 |
chain = data.at[i, 'chain']
|
1422 |
uniprotID = data.at[i, 'uniprotID']
|
1423 |
pdbID = data.at[i, 'pdbID']
|
1424 |
-
|
1425 |
-
st.write(source)
|
1426 |
-
st.write(chain)
|
1427 |
-
st.write(uniprotID)
|
1428 |
-
st.write(pdbID)
|
1429 |
-
st.write(uniprotID)
|
1430 |
-
st.write('INputs')
|
1431 |
-
st.write(uniprotID, pdbSequence, source, chain, pdbID, mode )
|
1432 |
-
|
1433 |
alignments = get_alignments_3D(uniprotID, 'nan', pdbSequence, source, chain, pdbID, mode,
|
1434 |
file_format='gzip')
|
1435 |
mutPos = data.at[i, 'mutationPositionOnPDB']
|
|
|
163 |
data.at[i, 'wt_sequence_match'] = 'i'
|
164 |
data.at[i, 'whichIsoform'] = whichIsoform
|
165 |
break
|
166 |
+
|
|
|
167 |
data.wt_sequence_match = data.wt_sequence_match.astype('str')
|
168 |
data.replace({'': 'nan'}, inplace=True)
|
169 |
data_size = len(data.drop_duplicates(['datapoint']))
|
|
|
287 |
pdb_info.at[index, 'chain'] = chain_id
|
288 |
pdb_info.at[index, 'resolution'] = resolution
|
289 |
index += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
print('PDB file processing finished..')
|
291 |
for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
|
292 |
try:
|
|
|
425 |
existing_pdb = None
|
426 |
with_pdb_size = len(with_pdb.drop_duplicates(['datapoint']))
|
427 |
with_pdb = None
|
428 |
+
|
|
|
|
|
|
|
429 |
|
430 |
print('Aligning sequences...\n')
|
431 |
aligned_m = final_stage(dfM, annotation_list, Path(path_to_output_files / 'alignment_files'))
|
432 |
aligned_nm = final_stage(dfNM, annotation_list, Path(path_to_output_files / 'alignment_files'))
|
433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
|
435 |
|
436 |
|
|
|
474 |
yes_pdb_no_match = after_up_pdb_alignment[
|
475 |
(after_up_pdb_alignment.pdbID != 'nan') & (after_up_pdb_alignment.mutationPositionOnPDB == 'nan')]
|
476 |
no_pdb = no_pdb.copy()
|
477 |
+
|
|
|
478 |
print('PDB matching is completed...\n')
|
479 |
print('SUMMARY')
|
480 |
print('-------')
|
|
|
875 |
if protein not in existing_modbase_models:
|
876 |
print('Downloading Modbase models for ', protein)
|
877 |
url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
|
|
|
878 |
req = requests.get(url)
|
879 |
name = path_to_output_files / 'modbase_structures' / f'{protein}.txt'
|
880 |
with open(name, 'wb') as f:
|
|
|
1382 |
data['uniprotSequence'] = data['uniprotSequence'].str.replace('U', 'C')
|
1383 |
data['pdbSequence'] = data['pdbSequence'].str.replace('U', 'C')
|
1384 |
for i in data.index:
|
|
|
1385 |
id_ = data.at[i, 'pdbID'].lower()
|
1386 |
up_id_ = data.at[i, 'uniprotID']
|
1387 |
score_ = str(data.at[i, 'score'])
|
|
|
1388 |
"""
|
1389 |
if data.at[i, 'source'] == 'PDB':
|
1390 |
pdb_path = Path(path_to_output_files / 'pdb_structures' / f'{id_}.pdb')
|
|
|
1398 |
chain = data.at[i, 'chain']
|
1399 |
uniprotID = data.at[i, 'uniprotID']
|
1400 |
pdbID = data.at[i, 'pdbID']
|
1401 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1402 |
alignments = get_alignments_3D(uniprotID, 'nan', pdbSequence, source, chain, pdbID, mode,
|
1403 |
file_format='gzip')
|
1404 |
mutPos = data.at[i, 'mutationPositionOnPDB']
|