fatmacankara commited on
Commit
c497936
·
1 Parent(s): fdfbb79

Update code/pdb_featureVector.py

Browse files
Files changed (1) hide show
  1. code/pdb_featureVector.py +4 -1
code/pdb_featureVector.py CHANGED
@@ -286,6 +286,7 @@ def pdb(input_set, mode, impute):
286
 
287
  uniprot_matched = uniprot_matched.merge(pdb_fasta, on=['pdbID', 'chain'], how='left')
288
  uniprot_matched = uniprot_matched.astype(str)
 
289
 
290
  with_pdb = uniprot_matched[(uniprot_matched.pdbID != 'nan') & (
291
  (uniprot_matched.resolution != 'nan') & (uniprot_matched.resolution != 'OT') & (
@@ -295,11 +296,12 @@ def pdb(input_set, mode, impute):
295
  uniprot_matched.resolution == 'None'))]
296
  no_pdb = no_pdb[~no_pdb.datapoint.isin(with_pdb.datapoint.to_list())]
297
  no_pdb.drop(columns=['chain', 'pdbID', 'pdbSequence', 'resolution'], inplace=True)
298
-
299
  print(
300
  'PDB Information successfully added...\nPDB structures are found for %d of %d.\n%d of %d failed to match with PDB structure.\n'
301
  % (len(with_pdb.drop_duplicates(['datapoint'])), len(uniprot_matched.drop_duplicates(['datapoint'])),
302
  len(no_pdb.drop_duplicates(['datapoint'])), len(uniprot_matched.drop_duplicates(['datapoint']))))
 
303
 
304
  with_pdb = with_pdb.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
305
  with_pdb = with_pdb.drop_duplicates(['uniprotID', 'wt', 'mut', 'pos', 'pdbSequence'], keep='first')
@@ -378,6 +380,7 @@ def pdb(input_set, mode, impute):
378
  # Isoform matches, i.e. labelled as i, isoform sequences will be aligned with PDB sequences.
379
  with_pdb['uniprotSequence'] = with_pdb['uniprotSequence'].str.replace('U', 'C')
380
  with_pdb['pdbSequence'] = with_pdb['pdbSequence'].str.replace('U', 'C')
 
381
 
382
  dfM = with_pdb[with_pdb.wt_sequence_match == 'm']
383
  dfM = dfM.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
 
286
 
287
  uniprot_matched = uniprot_matched.merge(pdb_fasta, on=['pdbID', 'chain'], how='left')
288
  uniprot_matched = uniprot_matched.astype(str)
289
+ st.write('uniprot_matched', uniprot_matched)
290
 
291
  with_pdb = uniprot_matched[(uniprot_matched.pdbID != 'nan') & (
292
  (uniprot_matched.resolution != 'nan') & (uniprot_matched.resolution != 'OT') & (
 
296
  uniprot_matched.resolution == 'None'))]
297
  no_pdb = no_pdb[~no_pdb.datapoint.isin(with_pdb.datapoint.to_list())]
298
  no_pdb.drop(columns=['chain', 'pdbID', 'pdbSequence', 'resolution'], inplace=True)
299
+ st.write('with_pdb', with_pdb)
300
  print(
301
  'PDB Information successfully added...\nPDB structures are found for %d of %d.\n%d of %d failed to match with PDB structure.\n'
302
  % (len(with_pdb.drop_duplicates(['datapoint'])), len(uniprot_matched.drop_duplicates(['datapoint'])),
303
  len(no_pdb.drop_duplicates(['datapoint'])), len(uniprot_matched.drop_duplicates(['datapoint']))))
304
+ st.write('with_pdb1', with_pdb)
305
 
306
  with_pdb = with_pdb.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
307
  with_pdb = with_pdb.drop_duplicates(['uniprotID', 'wt', 'mut', 'pos', 'pdbSequence'], keep='first')
 
380
  # Isoform matches, i.e. labelled as i, isoform sequences will be aligned with PDB sequences.
381
  with_pdb['uniprotSequence'] = with_pdb['uniprotSequence'].str.replace('U', 'C')
382
  with_pdb['pdbSequence'] = with_pdb['pdbSequence'].str.replace('U', 'C')
383
+ st.write('with_pdb2', with_pdb)
384
 
385
  dfM = with_pdb[with_pdb.wt_sequence_match == 'm']
386
  dfM = dfM.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)