Spaces:

HUBioDataLab
/

ASCARIS

Running

App Files Files Community

fatmacankara commited on Aug 25, 2023

Commit

92d57b2

1 Parent(s): c975dff

Update code/pdb_featureVector.py

Browse files

Files changed (1) hide show

code/pdb_featureVector.py +4 -35

code/pdb_featureVector.py CHANGED Viewed

@@ -163,8 +163,7 @@ def pdb(input_set, mode, impute):
                             data.at[i, 'wt_sequence_match'] = 'i'
                             data.at[i, 'whichIsoform'] = whichIsoform
                             break
-        st.write('MATCHING UNIPTOR')
-        st.write(data)
         data.wt_sequence_match = data.wt_sequence_match.astype('str')
         data.replace({'': 'nan'}, inplace=True)
         data_size = len(data.drop_duplicates(['datapoint']))
@@ -288,12 +287,6 @@ def pdb(input_set, mode, impute):
                         pdb_info.at[index, 'chain'] = chain_id
                         pdb_info.at[index, 'resolution'] = resolution
                         index += 1
-        print()
-        st.write('PDB INFO')
-        st.write(pdb_info)
-        st.write('PDB FASTA')
-        st.write('pdb_info')
         print('PDB file processing finished..')
         for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
             try:
@@ -432,24 +425,12 @@ def pdb(input_set, mode, impute):
         existing_pdb = None
         with_pdb_size = len(with_pdb.drop_duplicates(['datapoint']))
         with_pdb = None
-        st.write('dfM')
-        st.write(dfM)
-        st.write('dfNM')
-        st.write(dfNM)
         print('Aligning sequences...\n')
         aligned_m = final_stage(dfM, annotation_list, Path(path_to_output_files / 'alignment_files'))
         aligned_nm = final_stage(dfNM, annotation_list, Path(path_to_output_files / 'alignment_files'))
-        st.write('aligned_m')
-        st.write(aligned_m.iloc[0])
-        st.write(aligned_m.iloc[1])
-        st.write(aligned_m.iloc[2])
-        st.write(aligned_m.iloc[3])
-        st.write(aligned_m.iloc[4])
-        st.write(aligned_m.iloc[5])
@@ -493,8 +474,7 @@ def pdb(input_set, mode, impute):
         yes_pdb_no_match = after_up_pdb_alignment[
             (after_up_pdb_alignment.pdbID != 'nan') & (after_up_pdb_alignment.mutationPositionOnPDB == 'nan')]
         no_pdb = no_pdb.copy()
-        st.write('-----PDB ALIGNED-----')
-        st.write(pdb_aligned)
         print('PDB matching is completed...\n')
         print('SUMMARY')
         print('-------')
@@ -895,7 +875,6 @@ def pdb(input_set, mode, impute):
                 if protein not in existing_modbase_models:
                     print('Downloading Modbase models for ', protein)
                     url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
-                    print(url)
                     req = requests.get(url)
                     name = path_to_output_files / 'modbase_structures' / f'{protein}.txt'
                     with open(name, 'wb') as f:
@@ -1403,11 +1382,9 @@ def pdb(input_set, mode, impute):
         data['uniprotSequence'] = data['uniprotSequence'].str.replace('U', 'C')
         data['pdbSequence'] = data['pdbSequence'].str.replace('U', 'C')
         for i in data.index:
-            st.write('iiiii',i)
             id_ = data.at[i, 'pdbID'].lower()
             up_id_ = data.at[i, 'uniprotID']
             score_ = str(data.at[i, 'score'])
-            st.write(id_, up_id_, score_)
             """
             if data.at[i, 'source'] == 'PDB':
                 pdb_path = Path(path_to_output_files / 'pdb_structures' / f'{id_}.pdb')
@@ -1421,15 +1398,7 @@ def pdb(input_set, mode, impute):
             chain = data.at[i, 'chain']
             uniprotID = data.at[i, 'uniprotID']
             pdbID = data.at[i, 'pdbID']
-            st.write(pdbSequence)
-            st.write(source)
-            st.write(chain)
-            st.write(uniprotID)
-            st.write(pdbID)
-            st.write(uniprotID)
-            st.write('INputs')
-            st.write(uniprotID, pdbSequence, source, chain, pdbID, mode                                            )
             alignments = get_alignments_3D(uniprotID, 'nan', pdbSequence, source, chain, pdbID, mode,
                                             file_format='gzip')
             mutPos = data.at[i, 'mutationPositionOnPDB']

                             data.at[i, 'wt_sequence_match'] = 'i'
                             data.at[i, 'whichIsoform'] = whichIsoform
                             break
         data.wt_sequence_match = data.wt_sequence_match.astype('str')
         data.replace({'': 'nan'}, inplace=True)
         data_size = len(data.drop_duplicates(['datapoint']))
                         pdb_info.at[index, 'chain'] = chain_id
                         pdb_info.at[index, 'resolution'] = resolution
                         index += 1
         print('PDB file processing finished..')
         for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
             try:
         existing_pdb = None
         with_pdb_size = len(with_pdb.drop_duplicates(['datapoint']))
         with_pdb = None
         print('Aligning sequences...\n')
         aligned_m = final_stage(dfM, annotation_list, Path(path_to_output_files / 'alignment_files'))
         aligned_nm = final_stage(dfNM, annotation_list, Path(path_to_output_files / 'alignment_files'))
         yes_pdb_no_match = after_up_pdb_alignment[
             (after_up_pdb_alignment.pdbID != 'nan') & (after_up_pdb_alignment.mutationPositionOnPDB == 'nan')]
         no_pdb = no_pdb.copy()
         print('PDB matching is completed...\n')
         print('SUMMARY')
         print('-------')
                 if protein not in existing_modbase_models:
                     print('Downloading Modbase models for ', protein)
                     url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
                     req = requests.get(url)
                     name = path_to_output_files / 'modbase_structures' / f'{protein}.txt'
                     with open(name, 'wb') as f:
         data['uniprotSequence'] = data['uniprotSequence'].str.replace('U', 'C')
         data['pdbSequence'] = data['pdbSequence'].str.replace('U', 'C')
         for i in data.index:
             id_ = data.at[i, 'pdbID'].lower()
             up_id_ = data.at[i, 'uniprotID']
             score_ = str(data.at[i, 'score'])
             """
             if data.at[i, 'source'] == 'PDB':
                 pdb_path = Path(path_to_output_files / 'pdb_structures' / f'{id_}.pdb')
             chain = data.at[i, 'chain']
             uniprotID = data.at[i, 'uniprotID']
             pdbID = data.at[i, 'pdbID']
             alignments = get_alignments_3D(uniprotID, 'nan', pdbSequence, source, chain, pdbID, mode,
                                             file_format='gzip')
             mutPos = data.at[i, 'mutationPositionOnPDB']