fatmacankara commited on
Commit
f9741db
·
1 Parent(s): 55f11f5

Update code/pdb_featureVector.py

Browse files
Files changed (1) hide show
  1. code/pdb_featureVector.py +8 -7
code/pdb_featureVector.py CHANGED
@@ -95,7 +95,7 @@ def pdb(input_set, mode, impute):
95
  data.domStart = data.domStart.replace({'nan': '-1'})
96
  data.domEnd = data.domEnd.replace({'nan': '-1'})
97
  data.distance = data.distance.replace({'nan': '-1'})
98
-
99
  """
100
  STEP 4
101
  Retrieve canonical and isoform UniProt sequences.
@@ -197,6 +197,7 @@ def pdb(input_set, mode, impute):
197
  else:
198
  pdbs = []
199
  print('Processing PDB structures...\n')
 
200
  if pdbs == []:
201
  print('No PDB structure found for the query. ')
202
  print('Starting PDB structures download...\n')
@@ -297,7 +298,7 @@ def pdb(input_set, mode, impute):
297
  filename.rename(filename_replace_ext.with_suffix('.pdb'))
298
  except:
299
  FileNotFoundError
300
-
301
  uniprot_matched = pd.merge(uniprot_matched, pdb_info, on='uniprotID', how='left')
302
  uniprot_matched = uniprot_matched.astype(str)
303
  uniprot_matched = uniprot_matched.drop_duplicates()
@@ -402,7 +403,7 @@ def pdb(input_set, mode, impute):
402
  dfNM = dfNM.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
403
  dfNM = dfNM.drop_duplicates(['uniprotID', 'wt', 'mut', 'pos', 'pdbSequence'], keep='first')
404
  dfNM.rename(columns={'isoformSequence': 'uniprotSequence'}, inplace=True)
405
-
406
  dfM = dfM.astype(str)
407
  dfNM = dfNM.astype(str)
408
 
@@ -493,7 +494,7 @@ def pdb(input_set, mode, impute):
493
 
494
  print('Proceeding to SwissModel search...')
495
  print('------------------------------------\n')
496
-
497
  # At this point we have 4 dataframes
498
  # 1. after_up_pdb_alignment --- This is after PDB sequence alignment. There may be mutations that wasnt found matching to after the alignment. Will be searched in other databases as well.
499
  # 1a. aligned --- we are done with this.
@@ -592,7 +593,7 @@ def pdb(input_set, mode, impute):
592
 
593
  with_swiss_models = pd.concat([to_swiss, no_swiss_models]).drop_duplicates(['datapoint'], keep=False)
594
  with_swiss_models = with_swiss_models[to_swiss.columns]
595
-
596
  # Add model info.
597
 
598
  with_swiss_models = with_swiss_models.astype(str)
@@ -698,7 +699,7 @@ def pdb(input_set, mode, impute):
698
  swissmodels_fasta = pd.DataFrame(columns=['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta'])
699
  else:
700
  swissmodels_fasta.columns = ['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta']
701
-
702
  swissmodels_fasta = swissmodels_fasta.astype(str)
703
 
704
  swiss_models_with_data.qmean_norm = swiss_models_with_data.qmean_norm.astype(float)
@@ -813,7 +814,7 @@ def pdb(input_set, mode, impute):
813
  to_swiss_columns = to_swiss.columns
814
  to_swiss_size = len(to_swiss.drop_duplicates(['datapoint']))
815
  to_swiss = None
816
-
817
  # CONTROL
818
 
819
  """
 
95
  data.domStart = data.domStart.replace({'nan': '-1'})
96
  data.domEnd = data.domEnd.replace({'nan': '-1'})
97
  data.distance = data.distance.replace({'nan': '-1'})
98
+ st.write('1')
99
  """
100
  STEP 4
101
  Retrieve canonical and isoform UniProt sequences.
 
197
  else:
198
  pdbs = []
199
  print('Processing PDB structures...\n')
200
+ st.write('2')
201
  if pdbs == []:
202
  print('No PDB structure found for the query. ')
203
  print('Starting PDB structures download...\n')
 
298
  filename.rename(filename_replace_ext.with_suffix('.pdb'))
299
  except:
300
  FileNotFoundError
301
+ st.write('3')
302
  uniprot_matched = pd.merge(uniprot_matched, pdb_info, on='uniprotID', how='left')
303
  uniprot_matched = uniprot_matched.astype(str)
304
  uniprot_matched = uniprot_matched.drop_duplicates()
 
403
  dfNM = dfNM.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
404
  dfNM = dfNM.drop_duplicates(['uniprotID', 'wt', 'mut', 'pos', 'pdbSequence'], keep='first')
405
  dfNM.rename(columns={'isoformSequence': 'uniprotSequence'}, inplace=True)
406
+ st.write('4')
407
  dfM = dfM.astype(str)
408
  dfNM = dfNM.astype(str)
409
 
 
494
 
495
  print('Proceeding to SwissModel search...')
496
  print('------------------------------------\n')
497
+ st.write('5')
498
  # At this point we have 4 dataframes
499
  # 1. after_up_pdb_alignment --- This is after PDB sequence alignment. There may be mutations that wasnt found matching to after the alignment. Will be searched in other databases as well.
500
  # 1a. aligned --- we are done with this.
 
593
 
594
  with_swiss_models = pd.concat([to_swiss, no_swiss_models]).drop_duplicates(['datapoint'], keep=False)
595
  with_swiss_models = with_swiss_models[to_swiss.columns]
596
+ st.write('6')
597
  # Add model info.
598
 
599
  with_swiss_models = with_swiss_models.astype(str)
 
699
  swissmodels_fasta = pd.DataFrame(columns=['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta'])
700
  else:
701
  swissmodels_fasta.columns = ['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta']
702
+ st.write('7')
703
  swissmodels_fasta = swissmodels_fasta.astype(str)
704
 
705
  swiss_models_with_data.qmean_norm = swiss_models_with_data.qmean_norm.astype(float)
 
814
  to_swiss_columns = to_swiss.columns
815
  to_swiss_size = len(to_swiss.drop_duplicates(['datapoint']))
816
  to_swiss = None
817
+ st.write('8')
818
  # CONTROL
819
 
820
  """