fatmacankara commited on
Commit
92d57b2
·
1 Parent(s): c975dff

Update code/pdb_featureVector.py

Browse files
Files changed (1) hide show
  1. code/pdb_featureVector.py +4 -35
code/pdb_featureVector.py CHANGED
@@ -163,8 +163,7 @@ def pdb(input_set, mode, impute):
163
  data.at[i, 'wt_sequence_match'] = 'i'
164
  data.at[i, 'whichIsoform'] = whichIsoform
165
  break
166
- st.write('MATCHING UNIPTOR')
167
- st.write(data)
168
  data.wt_sequence_match = data.wt_sequence_match.astype('str')
169
  data.replace({'': 'nan'}, inplace=True)
170
  data_size = len(data.drop_duplicates(['datapoint']))
@@ -288,12 +287,6 @@ def pdb(input_set, mode, impute):
288
  pdb_info.at[index, 'chain'] = chain_id
289
  pdb_info.at[index, 'resolution'] = resolution
290
  index += 1
291
-
292
- print()
293
- st.write('PDB INFO')
294
- st.write(pdb_info)
295
- st.write('PDB FASTA')
296
- st.write('pdb_info')
297
  print('PDB file processing finished..')
298
  for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
299
  try:
@@ -432,24 +425,12 @@ def pdb(input_set, mode, impute):
432
  existing_pdb = None
433
  with_pdb_size = len(with_pdb.drop_duplicates(['datapoint']))
434
  with_pdb = None
435
- st.write('dfM')
436
- st.write(dfM)
437
- st.write('dfNM')
438
- st.write(dfNM)
439
 
440
  print('Aligning sequences...\n')
441
  aligned_m = final_stage(dfM, annotation_list, Path(path_to_output_files / 'alignment_files'))
442
  aligned_nm = final_stage(dfNM, annotation_list, Path(path_to_output_files / 'alignment_files'))
443
 
444
-
445
- st.write('aligned_m')
446
- st.write(aligned_m.iloc[0])
447
- st.write(aligned_m.iloc[1])
448
- st.write(aligned_m.iloc[2])
449
- st.write(aligned_m.iloc[3])
450
- st.write(aligned_m.iloc[4])
451
- st.write(aligned_m.iloc[5])
452
-
453
 
454
 
455
 
@@ -493,8 +474,7 @@ def pdb(input_set, mode, impute):
493
  yes_pdb_no_match = after_up_pdb_alignment[
494
  (after_up_pdb_alignment.pdbID != 'nan') & (after_up_pdb_alignment.mutationPositionOnPDB == 'nan')]
495
  no_pdb = no_pdb.copy()
496
- st.write('-----PDB ALIGNED-----')
497
- st.write(pdb_aligned)
498
  print('PDB matching is completed...\n')
499
  print('SUMMARY')
500
  print('-------')
@@ -895,7 +875,6 @@ def pdb(input_set, mode, impute):
895
  if protein not in existing_modbase_models:
896
  print('Downloading Modbase models for ', protein)
897
  url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
898
- print(url)
899
  req = requests.get(url)
900
  name = path_to_output_files / 'modbase_structures' / f'{protein}.txt'
901
  with open(name, 'wb') as f:
@@ -1403,11 +1382,9 @@ def pdb(input_set, mode, impute):
1403
  data['uniprotSequence'] = data['uniprotSequence'].str.replace('U', 'C')
1404
  data['pdbSequence'] = data['pdbSequence'].str.replace('U', 'C')
1405
  for i in data.index:
1406
- st.write('iiiii',i)
1407
  id_ = data.at[i, 'pdbID'].lower()
1408
  up_id_ = data.at[i, 'uniprotID']
1409
  score_ = str(data.at[i, 'score'])
1410
- st.write(id_, up_id_, score_)
1411
  """
1412
  if data.at[i, 'source'] == 'PDB':
1413
  pdb_path = Path(path_to_output_files / 'pdb_structures' / f'{id_}.pdb')
@@ -1421,15 +1398,7 @@ def pdb(input_set, mode, impute):
1421
  chain = data.at[i, 'chain']
1422
  uniprotID = data.at[i, 'uniprotID']
1423
  pdbID = data.at[i, 'pdbID']
1424
- st.write(pdbSequence)
1425
- st.write(source)
1426
- st.write(chain)
1427
- st.write(uniprotID)
1428
- st.write(pdbID)
1429
- st.write(uniprotID)
1430
- st.write('INputs')
1431
- st.write(uniprotID, pdbSequence, source, chain, pdbID, mode )
1432
-
1433
  alignments = get_alignments_3D(uniprotID, 'nan', pdbSequence, source, chain, pdbID, mode,
1434
  file_format='gzip')
1435
  mutPos = data.at[i, 'mutationPositionOnPDB']
 
163
  data.at[i, 'wt_sequence_match'] = 'i'
164
  data.at[i, 'whichIsoform'] = whichIsoform
165
  break
166
+
 
167
  data.wt_sequence_match = data.wt_sequence_match.astype('str')
168
  data.replace({'': 'nan'}, inplace=True)
169
  data_size = len(data.drop_duplicates(['datapoint']))
 
287
  pdb_info.at[index, 'chain'] = chain_id
288
  pdb_info.at[index, 'resolution'] = resolution
289
  index += 1
 
 
 
 
 
 
290
  print('PDB file processing finished..')
291
  for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
292
  try:
 
425
  existing_pdb = None
426
  with_pdb_size = len(with_pdb.drop_duplicates(['datapoint']))
427
  with_pdb = None
428
+
 
 
 
429
 
430
  print('Aligning sequences...\n')
431
  aligned_m = final_stage(dfM, annotation_list, Path(path_to_output_files / 'alignment_files'))
432
  aligned_nm = final_stage(dfNM, annotation_list, Path(path_to_output_files / 'alignment_files'))
433
 
 
 
 
 
 
 
 
 
 
434
 
435
 
436
 
 
474
  yes_pdb_no_match = after_up_pdb_alignment[
475
  (after_up_pdb_alignment.pdbID != 'nan') & (after_up_pdb_alignment.mutationPositionOnPDB == 'nan')]
476
  no_pdb = no_pdb.copy()
477
+
 
478
  print('PDB matching is completed...\n')
479
  print('SUMMARY')
480
  print('-------')
 
875
  if protein not in existing_modbase_models:
876
  print('Downloading Modbase models for ', protein)
877
  url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
 
878
  req = requests.get(url)
879
  name = path_to_output_files / 'modbase_structures' / f'{protein}.txt'
880
  with open(name, 'wb') as f:
 
1382
  data['uniprotSequence'] = data['uniprotSequence'].str.replace('U', 'C')
1383
  data['pdbSequence'] = data['pdbSequence'].str.replace('U', 'C')
1384
  for i in data.index:
 
1385
  id_ = data.at[i, 'pdbID'].lower()
1386
  up_id_ = data.at[i, 'uniprotID']
1387
  score_ = str(data.at[i, 'score'])
 
1388
  """
1389
  if data.at[i, 'source'] == 'PDB':
1390
  pdb_path = Path(path_to_output_files / 'pdb_structures' / f'{id_}.pdb')
 
1398
  chain = data.at[i, 'chain']
1399
  uniprotID = data.at[i, 'uniprotID']
1400
  pdbID = data.at[i, 'pdbID']
1401
+
 
 
 
 
 
 
 
 
1402
  alignments = get_alignments_3D(uniprotID, 'nan', pdbSequence, source, chain, pdbID, mode,
1403
  file_format='gzip')
1404
  mutPos = data.at[i, 'mutationPositionOnPDB']