Spaces:

HUBioDataLab
/

ASCARIS

Running

App Files Files Community

fatmacankara commited on Sep 7, 2023

Commit

f9741db

1 Parent(s): 55f11f5

Update code/pdb_featureVector.py

Browse files

Files changed (1) hide show

code/pdb_featureVector.py +8 -7

code/pdb_featureVector.py CHANGED Viewed

@@ -95,7 +95,7 @@ def pdb(input_set, mode, impute):
         data.domStart = data.domStart.replace({'nan': '-1'})
         data.domEnd = data.domEnd.replace({'nan': '-1'})
         data.distance = data.distance.replace({'nan': '-1'})
         """
         STEP 4
         Retrieve canonical and isoform UniProt sequences.
@@ -197,6 +197,7 @@ def pdb(input_set, mode, impute):
         else:
             pdbs = []
         print('Processing PDB structures...\n')
         if pdbs == []:
             print('No PDB structure found for the query. ')
         print('Starting PDB structures download...\n')
@@ -297,7 +298,7 @@ def pdb(input_set, mode, impute):
                     filename.rename(filename_replace_ext.with_suffix('.pdb'))
             except:
                 FileNotFoundError
         uniprot_matched = pd.merge(uniprot_matched, pdb_info, on='uniprotID', how='left')
         uniprot_matched = uniprot_matched.astype(str)
         uniprot_matched = uniprot_matched.drop_duplicates()
@@ -402,7 +403,7 @@ def pdb(input_set, mode, impute):
         dfNM = dfNM.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
         dfNM = dfNM.drop_duplicates(['uniprotID', 'wt', 'mut', 'pos', 'pdbSequence'], keep='first')
         dfNM.rename(columns={'isoformSequence': 'uniprotSequence'}, inplace=True)
         dfM = dfM.astype(str)
         dfNM = dfNM.astype(str)
@@ -493,7 +494,7 @@ def pdb(input_set, mode, impute):
         print('Proceeding to  SwissModel search...')
         print('------------------------------------\n')
         # At this point we have 4 dataframes
         # 1. after_up_pdb_alignment --- This is after PDB sequence alignment. There may be mutations that wasnt found matching to after the alignment. Will be searched in other databases as well.
         # 1a. aligned --- we are done with this.
@@ -592,7 +593,7 @@ def pdb(input_set, mode, impute):
         with_swiss_models = pd.concat([to_swiss, no_swiss_models]).drop_duplicates(['datapoint'], keep=False)
         with_swiss_models = with_swiss_models[to_swiss.columns]
         # Add model info.
         with_swiss_models = with_swiss_models.astype(str)
@@ -698,7 +699,7 @@ def pdb(input_set, mode, impute):
             swissmodels_fasta = pd.DataFrame(columns=['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta'])
         else:
             swissmodels_fasta.columns = ['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta']
         swissmodels_fasta = swissmodels_fasta.astype(str)
         swiss_models_with_data.qmean_norm = swiss_models_with_data.qmean_norm.astype(float)
@@ -813,7 +814,7 @@ def pdb(input_set, mode, impute):
         to_swiss_columns = to_swiss.columns
         to_swiss_size = len(to_swiss.drop_duplicates(['datapoint']))
         to_swiss = None
         # CONTROL
         """

         data.domStart = data.domStart.replace({'nan': '-1'})
         data.domEnd = data.domEnd.replace({'nan': '-1'})
         data.distance = data.distance.replace({'nan': '-1'})
+        st.write('1')
         """
         STEP 4
         Retrieve canonical and isoform UniProt sequences.
         else:
             pdbs = []
         print('Processing PDB structures...\n')
+        st.write('2')
         if pdbs == []:
             print('No PDB structure found for the query. ')
         print('Starting PDB structures download...\n')
                     filename.rename(filename_replace_ext.with_suffix('.pdb'))
             except:
                 FileNotFoundError
+        st.write('3')
         uniprot_matched = pd.merge(uniprot_matched, pdb_info, on='uniprotID', how='left')
         uniprot_matched = uniprot_matched.astype(str)
         uniprot_matched = uniprot_matched.drop_duplicates()
         dfNM = dfNM.sort_values(['uniprotID', 'resolution'], axis=0, ascending=True)
         dfNM = dfNM.drop_duplicates(['uniprotID', 'wt', 'mut', 'pos', 'pdbSequence'], keep='first')
         dfNM.rename(columns={'isoformSequence': 'uniprotSequence'}, inplace=True)
+        st.write('4')
         dfM = dfM.astype(str)
         dfNM = dfNM.astype(str)
         print('Proceeding to  SwissModel search...')
         print('------------------------------------\n')
+        st.write('5')
         # At this point we have 4 dataframes
         # 1. after_up_pdb_alignment --- This is after PDB sequence alignment. There may be mutations that wasnt found matching to after the alignment. Will be searched in other databases as well.
         # 1a. aligned --- we are done with this.
         with_swiss_models = pd.concat([to_swiss, no_swiss_models]).drop_duplicates(['datapoint'], keep=False)
         with_swiss_models = with_swiss_models[to_swiss.columns]
+        st.write('6')
         # Add model info.
         with_swiss_models = with_swiss_models.astype(str)
             swissmodels_fasta = pd.DataFrame(columns=['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta'])
         else:
             swissmodels_fasta.columns = ['uniprotID', 'template', 'qmean_norm', 'chain', 'fasta']
+        st.write('7')
         swissmodels_fasta = swissmodels_fasta.astype(str)
         swiss_models_with_data.qmean_norm = swiss_models_with_data.qmean_norm.astype(float)
         to_swiss_columns = to_swiss.columns
         to_swiss_size = len(to_swiss.drop_duplicates(['datapoint']))
         to_swiss = None
+        st.write('8')
         # CONTROL
         """