Spaces:

HUBioDataLab
/

ASCARIS

Running

File size: 4,998 Bytes

c2a02c6
 
 
 
 
 
 
 
 
 
 
8a2e1bf
c2a02c6
6877db6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec06c4b
 
c8b993f
 
0d7f3a7
 
 
b68774d
9abc2e6
c2a02c6
8a2e1bf
 
 
c2a02c6
8a2e1bf
c2a02c6
 
 
 
38ea622
ee0298f
e24c7fa
 
43efedb
0a37276
38ea622
 
 
65d977a
38ea622
65d977a
 
 
 
 
 
 
 
 
 
38ea622
65d977a
e24c7fa
65d977a
9e2f96b
c8b993f
 
 
e24c7fa
 
806931d
fa82089
e3df29c
8f90700
 
 
 
 
 
 
 
 
 
efb78a7
8f90700
 
 
38ea622
9cffd28
da9bcac
 
 
e52de1a
d6a723e
e52de1a
 
 
e3df29c
05e0ac4
e52de1a
 
e3df29c
da9bcac
4ed9060
 
a0f4539

import streamlit as st
import pandas as pd 
from os import path
import sys
import streamlit.components.v1 as components
sys.path.append('code/')
#sys.path.append('ASCARIS/code/') 
import pdb_featureVector
import alphafold_featureVector
import argparse
from st_aggrid import AgGrid, GridOptionsBuilder, JsCode,GridUpdateMode
import base64
showWarningOnDirectExecution = False


from datasets import Dataset, concatenate_datasets
MAX_SAMPLES_IN_MEMORY = 1000
samples_in_dset = 0
dset = Dataset.from_dict({"col1": [], "col2": []})  # empty dataset
path_to_save_dir = "HUBioData/input_files"
num_chunks = 0
for example_dict in custom_example_dict_streamer("HUBioData/AlphafoldStructures"):
    dset = dset.add_item(example_dict)
    samples_in_dset += 1
    if samples_in_dset == MAX_SAMPLES_IN_MEMORY:
        samples_in_dset = 0
        dset.save_to_disk(f"{path_to_save_dir}{num_chunks}")
        num_chunks =+ 1
        dset = Dataset.from_dict({"col1": [], "col2": []})  # empty dataset
if samples_in_dset > 0:
    dset.save_to_disk(f"{path_to_save_dir}{num_chunks}")
    num_chunks =+ 1
loaded_dsets = []  # memory-mapped
for chunk_num in range(num_chunks):
    dset = Dataset.load_from_disk(f"{path_to_save_dir}{chunk_num}") 
    loaded_dsets.append(dset)
final_dset = concatenate_datasets(dset)
st.write('FİNAL DSET')
st.write(final_dset)





def convert_df(df):
   return df.to_csv(index=False).encode('utf-8')

    
# Check if 'key' already exists in session_state
# If not, then initialize it
if 'visibility' not in st.session_state:
    st.session_state['visibility'] = 'visible'
    st.session_state.disabled =  False

original_title = '<p style="font-family:Trebuchet MS; color:#FD7456; font-size: 25px; font-weight:bold; text-align:center">ASCARIS</p>'
st.markdown(original_title, unsafe_allow_html=True)
original_title = '<p style="font-family:Trebuchet MS; color:#FD7456; font-size: 25px; font-weight:bold; text-align:center">(Annotation and StruCture-bAsed RepresentatIon of Single amino acid variations)</p>'
st.markdown(original_title, unsafe_allow_html=True)
 
st.write('')
st.write('')
st.write('')
st.write('')

with st.form('mform', clear_on_submit=False):
    source = st.selectbox('Select the protein structure resource (1: PDB-SwissModel-Modbase, 2: AlphaFold)',[1,2])
    ###source = 1
    impute = st.selectbox('Imputation',[True, False])
    input_data = st.text_input('Enter SAV data points (Example: Q00889-H-85-D, or Q00889-H-85-D,Q16363-Y-498-H)')
            


    parser = argparse.ArgumentParser(description='ASCARIS')
    
    #parser.add_argument('-s', '--source_option',
    #                    help='Selection of input structure data.\n 1: PDB Structures (default), 2: AlphaFold Structures',
    #                    default=1)
    #parser.add_argument('-i', '--input_datapoint',
    #                    help='Input file or query datapoint\n Option 1: Comma-separated list of identifiers (UniProt ID-wt residue-position-mutated residue (e.g. Q9Y4W6-N-432-T or Q9Y4W6-N-432-T, Q9Y4W6-N-432-T)) \n Option 2: Enter comma-separated file path')
    #
    #parser.add_argument('-impute', '--imputation_state', default='True',
    #                    help='Whether resulting feature vector should be imputed or not. Default True.')
    
    #args = parser.parse_args()
    
    input_set = input_data
    ###mode = 1
    impute = impute
    submitted = st.form_submit_button(label="Submit", help=None, on_click=None, args=None, kwargs=None, type="secondary", disabled=False, use_container_width=False)
    print('*****************************************')
    print('Feature vector generation is in progress. \nPlease check log file for updates..')
    print('*****************************************')
    ###mode = int(mode)
    mode = source
    
selected_df = pd.DataFrame()
st.write('The online tool may be slow, especially while processing multiple SAVs, please consider using the local programmatic version at https://github.com/HUBioDataLab/ASCARIS/')
if submitted:
    with st.spinner('In progress...This may take a while...'):
        try:
            if mode == 1:
                selected_df = pdb_featureVector.pdb(input_set, mode, impute)    
                
            elif mode == 2:
                selected_df = alphafold_featureVector.alphafold(input_set, mode, impute)
            else:
                selected_df =  pd.DataFrame()

        except:
            selected_df = pd.DataFrame()
            pass

    if selected_df is None:
        st.success('Feature vector failed. Check log file.')

    else:
        if len(selected_df) != 0 :
            st.write(selected_df)
            st.success('Feature vector successfully created.')
            csv = convert_df(selected_df)
    
            st.download_button("Press to Download the Feature Vector", csv,f"ASCARIS_SAV_rep_{input_set}.csv","text/csv",key='download-csv')

        else:
            st.success('Feature vector failed. Check log file.')