Spaces:
Running
Running
Commit
·
ad9add7
1
Parent(s):
f44aa18
Update code/modbaseModelAdd.py
Browse files- code/modbaseModelAdd.py +16 -6
code/modbaseModelAdd.py
CHANGED
@@ -10,7 +10,6 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
|
|
10 |
# GET MODBASE MODELS
|
11 |
# Get IDs from data to retrieve only their models from MODBASE
|
12 |
dataframe.reset_index(inplace=True, drop=True)
|
13 |
-
|
14 |
existing_modbase_models = list(Path(path_to_output_files / 'modbase_structures').glob("*"))
|
15 |
existing_modbase_models = [str(i) for i in existing_modbase_models]
|
16 |
existing_modbase_models = [i.split('/')[-1].split('.')[0] for i in existing_modbase_models]
|
@@ -32,11 +31,15 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
|
|
32 |
existing_free_sasa = list(Path(path_to_output_files / 'freesasa_files').glob("*"))
|
33 |
existing_free_sasa = [str(i) for i in existing_free_sasa]
|
34 |
existing_free_sasa = [i.split('/')[-1].split('.')[0] for i in existing_free_sasa]
|
|
|
35 |
for i in dataframe.index:
|
36 |
coordDict = {}
|
37 |
protein = dataframe.at[i, 'uniprotID']
|
38 |
varPos = int(dataframe.at[i, 'pos'])
|
39 |
wt = dataframe.at[i, 'wt']
|
|
|
|
|
|
|
40 |
if protein not in existing_modbase_models:
|
41 |
print('Downloading Modbase models for ', protein)
|
42 |
url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
|
@@ -104,7 +107,7 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
|
|
104 |
'model_id': model_id, 'coordinates': coordDict,
|
105 |
'AAonPDB': AAonPDB, 'coordVAR': coordVAR}
|
106 |
modbase_reduced = modbase_reduced.append(new_row, ignore_index=True)
|
107 |
-
modbase_reduced = modbase_reduced[['uniprotID', 'quality_score', 'model_id', 'coordinates', 'AAonPDB', 'coordVAR']]
|
108 |
modbase = dataframe.merge(modbase_reduced, on='uniprotID', how='left')
|
109 |
modbase.quality_score = modbase.quality_score.astype(float)
|
110 |
modbase = modbase.sort_values(by=['datapoint', 'quality_score'], ascending=False)
|
@@ -119,13 +122,20 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
|
|
119 |
'': np.NaN}, inplace=True)
|
120 |
except NameError:
|
121 |
print('This file doesnt have Quality Score. Replacer: -999', model_id)
|
|
|
|
|
|
|
|
|
|
|
122 |
else:
|
123 |
-
|
124 |
-
|
125 |
-
no_modbase = no_modbase.append(
|
|
|
|
|
126 |
|
127 |
no_modbase_no_Coord = modbase[pd.isna(modbase['coordVAR'])]
|
128 |
no_modbase = pd.concat([no_modbase, no_modbase_no_Coord])
|
129 |
modbase = modbase[~pd.isna(modbase['coordVAR'])]
|
130 |
-
|
131 |
return modbase, no_modbase
|
|
|
10 |
# GET MODBASE MODELS
|
11 |
# Get IDs from data to retrieve only their models from MODBASE
|
12 |
dataframe.reset_index(inplace=True, drop=True)
|
|
|
13 |
existing_modbase_models = list(Path(path_to_output_files / 'modbase_structures').glob("*"))
|
14 |
existing_modbase_models = [str(i) for i in existing_modbase_models]
|
15 |
existing_modbase_models = [i.split('/')[-1].split('.')[0] for i in existing_modbase_models]
|
|
|
31 |
existing_free_sasa = list(Path(path_to_output_files / 'freesasa_files').glob("*"))
|
32 |
existing_free_sasa = [str(i) for i in existing_free_sasa]
|
33 |
existing_free_sasa = [i.split('/')[-1].split('.')[0] for i in existing_free_sasa]
|
34 |
+
keep_cols = dataframe.columns
|
35 |
for i in dataframe.index:
|
36 |
coordDict = {}
|
37 |
protein = dataframe.at[i, 'uniprotID']
|
38 |
varPos = int(dataframe.at[i, 'pos'])
|
39 |
wt = dataframe.at[i, 'wt']
|
40 |
+
mut = dataframe.at[i, 'mut']
|
41 |
+
datapoint = dataframe.at[i, 'datapoint']
|
42 |
+
|
43 |
if protein not in existing_modbase_models:
|
44 |
print('Downloading Modbase models for ', protein)
|
45 |
url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
|
|
|
107 |
'model_id': model_id, 'coordinates': coordDict,
|
108 |
'AAonPDB': AAonPDB, 'coordVAR': coordVAR}
|
109 |
modbase_reduced = modbase_reduced.append(new_row, ignore_index=True)
|
110 |
+
modbase_reduced = modbase_reduced[['uniprotID', 'quality_score', 'model_id', 'coordinates', 'AAonPDB', 'coordVAR']]
|
111 |
modbase = dataframe.merge(modbase_reduced, on='uniprotID', how='left')
|
112 |
modbase.quality_score = modbase.quality_score.astype(float)
|
113 |
modbase = modbase.sort_values(by=['datapoint', 'quality_score'], ascending=False)
|
|
|
122 |
'': np.NaN}, inplace=True)
|
123 |
except NameError:
|
124 |
print('This file doesnt have Quality Score. Replacer: -999', model_id)
|
125 |
+
else:
|
126 |
+
new_row = {'uniprotID': uniprot_id, 'wt': wt,
|
127 |
+
'pos': varPos, 'mut': mut, 'datapoint': datapoint }
|
128 |
+
no_modbase = no_modbase.append(new_row, ignore_index=True)
|
129 |
+
|
130 |
else:
|
131 |
+
new_row = {'uniprotID': uniprot_id, 'wt': wt,
|
132 |
+
'pos': varPos, 'mut': mut, 'datapoint': datapoint }
|
133 |
+
no_modbase = no_modbase.append(new_row, ignore_index=True)
|
134 |
+
|
135 |
+
|
136 |
|
137 |
no_modbase_no_Coord = modbase[pd.isna(modbase['coordVAR'])]
|
138 |
no_modbase = pd.concat([no_modbase, no_modbase_no_Coord])
|
139 |
modbase = modbase[~pd.isna(modbase['coordVAR'])]
|
140 |
+
no_modbase = no_modbase[keep_cols]
|
141 |
return modbase, no_modbase
|