Spaces:
Sleeping
Sleeping
Commit
·
f9741db
1
Parent(s):
55f11f5
Update code/pdb_featureVector.py
Browse files
code/pdb_featureVector.py
CHANGED
@@ -95,7 +95,7 @@ def pdb(input_set, mode, impute):
|
|
95 |
data.domStart = data.domStart.replace({'nan': '-1'})
|
96 |
data.domEnd = data.domEnd.replace({'nan': '-1'})
|
97 |
data.distance = data.distance.replace({'nan': '-1'})
|
98 |
-
|
99 |
"""
|
100 |
STEP 4
|
101 |
Retrieve canonical and isoform UniProt sequences.
|
@@ -197,6 +197,7 @@ def pdb(input_set, mode, impute):
|
|
197 |
else:
|
198 |
pdbs = []
|
199 |
print('Processing PDB structures...\n')
|
|
|
200 |
if pdbs == []:
|
201 |
print('No PDB structure found for the query. ')
|
202 |
print('Starting PDB structures download...\n')
|
@@ -297,7 +298,7 @@ def pdb(input_set, mode, impute):
|
|
297 |
filename.rename(filename_replace_ext.with_suffix('.pdb'))
|
298 |
except:
|
299 |
FileNotFoundError
|
300 |
-
|
301 |
uniprot_matched = pd.merge(uniprot_matched, pdb_info, on='uniprotID', how='left')
|
302 |
uniprot_matched = uniprot_matched.astype(str)
|
303 |
uniprot_matched = uniprot_matched.drop_duplicates()
|
@@ -402,7 +403,7 @@ def pdb(input_set, mode, impute):
|
|
402 |
dfNM = dfNM.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
|
403 |
dfNM = dfNM.drop_duplicates(['uniprotID', 'wt', 'mut', 'pos', 'pdbSequence'], keep='first')
|
404 |
dfNM.rename(columns={'isoformSequence': 'uniprotSequence'}, inplace=True)
|
405 |
-
|
406 |
dfM = dfM.astype(str)
|
407 |
dfNM = dfNM.astype(str)
|
408 |
|
@@ -493,7 +494,7 @@ def pdb(input_set, mode, impute):
|
|
493 |
|
494 |
print('Proceeding to SwissModel search...')
|
495 |
print('------------------------------------\n')
|
496 |
-
|
497 |
# At this point we have 4 dataframes
|
498 |
# 1. after_up_pdb_alignment --- This is after PDB sequence alignment. There may be mutations that wasnt found matching to after the alignment. Will be searched in other databases as well.
|
499 |
# 1a. aligned --- we are done with this.
|
@@ -592,7 +593,7 @@ def pdb(input_set, mode, impute):
|
|
592 |
|
593 |
with_swiss_models = pd.concat([to_swiss, no_swiss_models]).drop_duplicates(['datapoint'], keep=False)
|
594 |
with_swiss_models = with_swiss_models[to_swiss.columns]
|
595 |
-
|
596 |
# Add model info.
|
597 |
|
598 |
with_swiss_models = with_swiss_models.astype(str)
|
@@ -698,7 +699,7 @@ def pdb(input_set, mode, impute):
|
|
698 |
swissmodels_fasta = pd.DataFrame(columns=['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta'])
|
699 |
else:
|
700 |
swissmodels_fasta.columns = ['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta']
|
701 |
-
|
702 |
swissmodels_fasta = swissmodels_fasta.astype(str)
|
703 |
|
704 |
swiss_models_with_data.qmean_norm = swiss_models_with_data.qmean_norm.astype(float)
|
@@ -813,7 +814,7 @@ def pdb(input_set, mode, impute):
|
|
813 |
to_swiss_columns = to_swiss.columns
|
814 |
to_swiss_size = len(to_swiss.drop_duplicates(['datapoint']))
|
815 |
to_swiss = None
|
816 |
-
|
817 |
# CONTROL
|
818 |
|
819 |
"""
|
|
|
95 |
data.domStart = data.domStart.replace({'nan': '-1'})
|
96 |
data.domEnd = data.domEnd.replace({'nan': '-1'})
|
97 |
data.distance = data.distance.replace({'nan': '-1'})
|
98 |
+
st.write('1')
|
99 |
"""
|
100 |
STEP 4
|
101 |
Retrieve canonical and isoform UniProt sequences.
|
|
|
197 |
else:
|
198 |
pdbs = []
|
199 |
print('Processing PDB structures...\n')
|
200 |
+
st.write('2')
|
201 |
if pdbs == []:
|
202 |
print('No PDB structure found for the query. ')
|
203 |
print('Starting PDB structures download...\n')
|
|
|
298 |
filename.rename(filename_replace_ext.with_suffix('.pdb'))
|
299 |
except:
|
300 |
FileNotFoundError
|
301 |
+
st.write('3')
|
302 |
uniprot_matched = pd.merge(uniprot_matched, pdb_info, on='uniprotID', how='left')
|
303 |
uniprot_matched = uniprot_matched.astype(str)
|
304 |
uniprot_matched = uniprot_matched.drop_duplicates()
|
|
|
403 |
dfNM = dfNM.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
|
404 |
dfNM = dfNM.drop_duplicates(['uniprotID', 'wt', 'mut', 'pos', 'pdbSequence'], keep='first')
|
405 |
dfNM.rename(columns={'isoformSequence': 'uniprotSequence'}, inplace=True)
|
406 |
+
st.write('4')
|
407 |
dfM = dfM.astype(str)
|
408 |
dfNM = dfNM.astype(str)
|
409 |
|
|
|
494 |
|
495 |
print('Proceeding to SwissModel search...')
|
496 |
print('------------------------------------\n')
|
497 |
+
st.write('5')
|
498 |
# At this point we have 4 dataframes
|
499 |
# 1. after_up_pdb_alignment --- This is after PDB sequence alignment. There may be mutations that wasnt found matching to after the alignment. Will be searched in other databases as well.
|
500 |
# 1a. aligned --- we are done with this.
|
|
|
593 |
|
594 |
with_swiss_models = pd.concat([to_swiss, no_swiss_models]).drop_duplicates(['datapoint'], keep=False)
|
595 |
with_swiss_models = with_swiss_models[to_swiss.columns]
|
596 |
+
st.write('6')
|
597 |
# Add model info.
|
598 |
|
599 |
with_swiss_models = with_swiss_models.astype(str)
|
|
|
699 |
swissmodels_fasta = pd.DataFrame(columns=['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta'])
|
700 |
else:
|
701 |
swissmodels_fasta.columns = ['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta']
|
702 |
+
st.write('7')
|
703 |
swissmodels_fasta = swissmodels_fasta.astype(str)
|
704 |
|
705 |
swiss_models_with_data.qmean_norm = swiss_models_with_data.qmean_norm.astype(float)
|
|
|
814 |
to_swiss_columns = to_swiss.columns
|
815 |
to_swiss_size = len(to_swiss.drop_duplicates(['datapoint']))
|
816 |
to_swiss = None
|
817 |
+
st.write('8')
|
818 |
# CONTROL
|
819 |
|
820 |
"""
|