fatmacankara commited on
Commit
ad9add7
·
1 Parent(s): f44aa18

Update code/modbaseModelAdd.py

Browse files
Files changed (1) hide show
  1. code/modbaseModelAdd.py +16 -6
code/modbaseModelAdd.py CHANGED
@@ -10,7 +10,6 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
10
  # GET MODBASE MODELS
11
  # Get IDs from data to retrieve only their models from MODBASE
12
  dataframe.reset_index(inplace=True, drop=True)
13
-
14
  existing_modbase_models = list(Path(path_to_output_files / 'modbase_structures').glob("*"))
15
  existing_modbase_models = [str(i) for i in existing_modbase_models]
16
  existing_modbase_models = [i.split('/')[-1].split('.')[0] for i in existing_modbase_models]
@@ -32,11 +31,15 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
32
  existing_free_sasa = list(Path(path_to_output_files / 'freesasa_files').glob("*"))
33
  existing_free_sasa = [str(i) for i in existing_free_sasa]
34
  existing_free_sasa = [i.split('/')[-1].split('.')[0] for i in existing_free_sasa]
 
35
  for i in dataframe.index:
36
  coordDict = {}
37
  protein = dataframe.at[i, 'uniprotID']
38
  varPos = int(dataframe.at[i, 'pos'])
39
  wt = dataframe.at[i, 'wt']
 
 
 
40
  if protein not in existing_modbase_models:
41
  print('Downloading Modbase models for ', protein)
42
  url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
@@ -104,7 +107,7 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
104
  'model_id': model_id, 'coordinates': coordDict,
105
  'AAonPDB': AAonPDB, 'coordVAR': coordVAR}
106
  modbase_reduced = modbase_reduced.append(new_row, ignore_index=True)
107
- modbase_reduced = modbase_reduced[['uniprotID', 'quality_score', 'model_id', 'coordinates', 'AAonPDB', 'coordVAR']]
108
  modbase = dataframe.merge(modbase_reduced, on='uniprotID', how='left')
109
  modbase.quality_score = modbase.quality_score.astype(float)
110
  modbase = modbase.sort_values(by=['datapoint', 'quality_score'], ascending=False)
@@ -119,13 +122,20 @@ def addModbaseModels(dataframe, path_to_input_files, path_to_output_files):
119
  '': np.NaN}, inplace=True)
120
  except NameError:
121
  print('This file doesnt have Quality Score. Replacer: -999', model_id)
 
 
 
 
 
122
  else:
123
- k = pd.Series(
124
- dataframe.loc[i])
125
- no_modbase = no_modbase.append(k, ignore_index=True)
 
 
126
 
127
  no_modbase_no_Coord = modbase[pd.isna(modbase['coordVAR'])]
128
  no_modbase = pd.concat([no_modbase, no_modbase_no_Coord])
129
  modbase = modbase[~pd.isna(modbase['coordVAR'])]
130
-
131
  return modbase, no_modbase
 
10
  # GET MODBASE MODELS
11
  # Get IDs from data to retrieve only their models from MODBASE
12
  dataframe.reset_index(inplace=True, drop=True)
 
13
  existing_modbase_models = list(Path(path_to_output_files / 'modbase_structures').glob("*"))
14
  existing_modbase_models = [str(i) for i in existing_modbase_models]
15
  existing_modbase_models = [i.split('/')[-1].split('.')[0] for i in existing_modbase_models]
 
31
  existing_free_sasa = list(Path(path_to_output_files / 'freesasa_files').glob("*"))
32
  existing_free_sasa = [str(i) for i in existing_free_sasa]
33
  existing_free_sasa = [i.split('/')[-1].split('.')[0] for i in existing_free_sasa]
34
+ keep_cols = dataframe.columns
35
  for i in dataframe.index:
36
  coordDict = {}
37
  protein = dataframe.at[i, 'uniprotID']
38
  varPos = int(dataframe.at[i, 'pos'])
39
  wt = dataframe.at[i, 'wt']
40
+ mut = dataframe.at[i, 'mut']
41
+ datapoint = dataframe.at[i, 'datapoint']
42
+
43
  if protein not in existing_modbase_models:
44
  print('Downloading Modbase models for ', protein)
45
  url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
 
107
  'model_id': model_id, 'coordinates': coordDict,
108
  'AAonPDB': AAonPDB, 'coordVAR': coordVAR}
109
  modbase_reduced = modbase_reduced.append(new_row, ignore_index=True)
110
+ modbase_reduced = modbase_reduced[['uniprotID', 'quality_score', 'model_id', 'coordinates', 'AAonPDB', 'coordVAR']]
111
  modbase = dataframe.merge(modbase_reduced, on='uniprotID', how='left')
112
  modbase.quality_score = modbase.quality_score.astype(float)
113
  modbase = modbase.sort_values(by=['datapoint', 'quality_score'], ascending=False)
 
122
  '': np.NaN}, inplace=True)
123
  except NameError:
124
  print('This file doesnt have Quality Score. Replacer: -999', model_id)
125
+ else:
126
+ new_row = {'uniprotID': uniprot_id, 'wt': wt,
127
+ 'pos': varPos, 'mut': mut, 'datapoint': datapoint }
128
+ no_modbase = no_modbase.append(new_row, ignore_index=True)
129
+
130
  else:
131
+ new_row = {'uniprotID': uniprot_id, 'wt': wt,
132
+ 'pos': varPos, 'mut': mut, 'datapoint': datapoint }
133
+ no_modbase = no_modbase.append(new_row, ignore_index=True)
134
+
135
+
136
 
137
  no_modbase_no_Coord = modbase[pd.isna(modbase['coordVAR'])]
138
  no_modbase = pd.concat([no_modbase, no_modbase_no_Coord])
139
  modbase = modbase[~pd.isna(modbase['coordVAR'])]
140
+ no_modbase = no_modbase[keep_cols]
141
  return modbase, no_modbase