Spaces:

ncats
/

EpiPipeline4RD

Running

File size: 3,330 Bytes

d7784f0
 
ddff90b
 
7ce5b82
ddff90b
7ce5b82
ddff90b
 
0416a61
1656abd
0416a61
 
 
 
 
 
 
1656abd
0416a61
1656abd
 
 
 
 
 
 
 
 
 
ddff90b
1656abd
 
 
 
 
 
 
 
0416a61
 
 
1656abd
0416a61
1656abd
0416a61
ddff90b
7ead1f4
1656abd
 
 
7ead1f4
 
1656abd
 
 
7ead1f4
 
1656abd
 
 
7ead1f4
 
ddff90b
 
0416a61
 
 
 
 
1656abd

import nltk
nltk.download('stopwords')
import pandas as pd
#classify_abs is a dependency for extract_abs
import classify_abs
import extract_abs
#pd.set_option('display.max_colwidth', None)
import streamlit as st

########## Title for the Web App ##########
st.title("Epidemiology Extraction Pipeline for Rare Diseases by the National Center for Advancing Translational Sciences (NIH/NCATS)")

#st.header(body, anchor=None)
#st.subheader(body, anchor=None) 
#Anchor is for the URL, can be custom str

# https://docs.streamlit.io/library/api-reference/text/st.markdown

col1, col2 = st.columns(2)

with col1:
    st.header("Rare ")
    disease_or_gard_id = st.text_input('Input a rare disease term or a GARD ID.', 'Fellman syndrome')
                                     
with col2:
    filtering = st.radio("What type of filtering would you like?",('Strict', 'Lenient', 'None'))
    extract_diseases = st.checkbox("Extract Rare Diseases", value=False)
    #max_results is Maximum number of PubMed ID's to retrieve BEFORE filtering
    max_results = st.sidebar.number_input(label, min_value=1, max_value=None, value=50)
    # https://docs.streamlit.io/library/api-reference/widgets/st.number_input

with col1:
    with st.spinner('Loading Epidemiology Models and Dependencies...'):
        classify_model_vars = classify_abs.init_classify_model()
        st.success('Epidemiology Classification Model Loaded!')
        NER_pipeline, entity_classes = extract_abs.init_NER_pipeline()
        st.success('Epidemiology Extraction Model Loaded!')
        GARD_dict, max_length = extract_abs.load_GARD_diseases()
    st.success('All Models and Dependencies Loaded!')

# st.radio(label, options, index=0, format_func=special_internal_function, key=None, help=None, on_change=None, args=None, kwargs=None, *, disabled=False)
# https://docs.streamlit.io/library/api-reference/widgets/st.radio
#filtering = st.sidebar.radio("What type of filtering would you like?",('Strict', 'Lenient', 'None'))

#extract_diseases = st.sidebar.checkbox("Extract Rare Diseases", value=False)
# https://docs.streamlit.io/library/api-reference/widgets/st.checkbox

#LSTM RNN Epi Classifier Model
#with st.spinner('Loading Epidemiology Classification Model...'):
#    classify_model_vars = classify_abs.init_classify_model()
#st.success('Epidemiology Classification Model Loaded!')

#GARD Dictionary - For filtering and exact match disease/GARD ID identification
#with st.spinner('Loading GARD Rare Disease Dictionary...'):
#    GARD_dict, max_length = extract_abs.load_GARD_diseases()
#st.success('GARD Rare Disease Dictionary Loaded!')

#BioBERT-based NER pipeline, open `entities` to see 
#with st.spinner('Loading Epidemiology Extraction Model...'):
#    NER_pipeline, entity_classes = extract_abs.init_NER_pipeline()
#st.success('Epidemiology Extraction Model Loaded!')


#filtering options are 'strict','lenient'(default), 'none'
if text:
  df = extract_abs.search_term_extraction(disease_or_gard_id, max_results, filtering,
                                           NER_pipeline, entity_classes, 
                                           extract_diseases,GARD_dict, max_length, 
                                           classify_model_vars)
  st.dataframe(df)
  #st.dataframe(data=None, width=None, height=None)
  
# st.code(body, language="python")