Spaces:

ncats
/

EpiPipeline4RD

Running

File size: 4,108 Bytes

d7784f0
 
ddff90b
 
7ce5b82
ddff90b
7ce5b82
ddff90b
 
0416a61
f2852e3
 
0416a61
 
 
 
 
f2852e3
1656abd
0416a61
1656abd
 
 
 
 
 
 
 
f2852e3
1656abd
ddff90b
1656abd
 
 
 
 
 
 
 
f2852e3
0416a61
f2852e3
 
 
 
0416a61
 
f2852e3
0416a61
f2852e3
0416a61
ddff90b
f2852e3
 
 
 
 
 
 
 
 
7ead1f4
f2852e3
 
 
7ead1f4
 
f2852e3
 
 
7ead1f4
 
f2852e3
 
 
 
7ead1f4
ddff90b
 
0416a61
 
 
 
 
3ed57d7
1656abd
 
f2852e3

import nltk
nltk.download('stopwords')
import pandas as pd
#classify_abs is a dependency for extract_abs
import classify_abs
import extract_abs
#pd.set_option('display.max_colwidth', None)
import streamlit as st

########## Title for the Web App ##########
st.title("Epidemiology Extraction Pipeline for Rare Diseases")
st.subheader("by the National Center for Advancing Translational Sciences (NIH/NCATS)") 
#st.header(body, anchor=None)
#st.subheader(body, anchor=None) 
#Anchor is for the URL, can be custom str

# https://docs.streamlit.io/library/api-reference/text/st.markdown
'''
col1, col2 = st.columns(2)

with col1:
    st.header("Rare ")
    disease_or_gard_id = st.text_input('Input a rare disease term or a GARD ID.', 'Fellman syndrome')
                                     
with col2:
    filtering = st.radio("What type of filtering would you like?",('Strict', 'Lenient', 'None'))
    extract_diseases = st.checkbox("Extract Rare Diseases", value=False)
    #max_results is Maximum number of PubMed ID's to retrieve BEFORE filtering
    max_results = st.number_input("Maximum number of articles to find in PubMed", min_value=1, max_value=None, value=50)
    # https://docs.streamlit.io/library/api-reference/widgets/st.number_input

with col1:
    with st.spinner('Loading Epidemiology Models and Dependencies...'):
        classify_model_vars = classify_abs.init_classify_model()
        st.success('Epidemiology Classification Model Loaded!')
        NER_pipeline, entity_classes = extract_abs.init_NER_pipeline()
        st.success('Epidemiology Extraction Model Loaded!')
        GARD_dict, max_length = extract_abs.load_GARD_diseases()
    st.success('All Models and Dependencies Loaded!')
'''

#max_results is Maximum number of PubMed ID's to retrieve BEFORE filtering
    max_results = st.sidebar.number_input("Maximum number of articles to find in PubMed", min_value=1, max_value=None, value=50)
    # https://docs.streamlit.io/library/api-reference/widgets/st.number_input
    
# st.radio(label, options, index=0, format_func=special_internal_function, key=None, help=None, on_change=None, args=None, kwargs=None, *, disabled=False)
# https://docs.streamlit.io/library/api-reference/widgets/st.radio
filtering = st.sidebar.radio("What type of filtering would you like?",('Strict', 'Lenient', 'None'))

extract_diseases = st.sidebar.checkbox("Extract Rare Diseases", value=False)
# https://docs.streamlit.io/library/api-reference/widgets/st.checkbox

with st.spinner('Loading Epidemiology Models and Dependencies...'):
    classify_model_vars = classify_abs.init_classify_model()
    st.success('Epidemiology Classification Model Loaded!')
    NER_pipeline, entity_classes = extract_abs.init_NER_pipeline()
    st.success('Epidemiology Extraction Model Loaded!')
    GARD_dict, max_length = extract_abs.load_GARD_diseases()
st.success('All Models and Dependencies Loaded!')

'''
#LSTM RNN Epi Classifier Model
with st.spinner('Loading Epidemiology Classification Model...'):
    classify_model_vars = classify_abs.init_classify_model()
st.success('Epidemiology Classification Model Loaded!')

#GARD Dictionary - For filtering and exact match disease/GARD ID identification
with st.spinner('Loading GARD Rare Disease Dictionary...'):
    GARD_dict, max_length = extract_abs.load_GARD_diseases()
st.success('GARD Rare Disease Dictionary Loaded!')

#BioBERT-based NER pipeline, open `entities` to see 
with st.spinner('Loading Epidemiology Extraction Model...'):
    NER_pipeline, entity_classes = extract_abs.init_NER_pipeline()
st.success('Epidemiology Extraction Model Loaded!')
'''

#filtering options are 'strict','lenient'(default), 'none'
if text:
  df = extract_abs.search_term_extraction(disease_or_gard_id, max_results, filtering,
                                           NER_pipeline, entity_classes, 
                                           extract_diseases,GARD_dict, max_length, 
                                           classify_model_vars)
  st.dataframe(df)
  st.balloons()
  #st.dataframe(data=None, width=None, height=None)
  
# st.code(body, language="python")