Spaces:
Sleeping
Sleeping
File size: 3,330 Bytes
d7784f0 ddff90b 7ce5b82 ddff90b 7ce5b82 ddff90b 0416a61 1656abd 0416a61 1656abd 0416a61 1656abd ddff90b 1656abd 0416a61 1656abd 0416a61 1656abd 0416a61 ddff90b 7ead1f4 1656abd 7ead1f4 1656abd 7ead1f4 1656abd 7ead1f4 ddff90b 0416a61 1656abd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import nltk
nltk.download('stopwords')
import pandas as pd
#classify_abs is a dependency for extract_abs
import classify_abs
import extract_abs
#pd.set_option('display.max_colwidth', None)
import streamlit as st
########## Title for the Web App ##########
st.title("Epidemiology Extraction Pipeline for Rare Diseases by the National Center for Advancing Translational Sciences (NIH/NCATS)")
#st.header(body, anchor=None)
#st.subheader(body, anchor=None)
#Anchor is for the URL, can be custom str
# https://docs.streamlit.io/library/api-reference/text/st.markdown
col1, col2 = st.columns(2)
with col1:
st.header("Rare ")
disease_or_gard_id = st.text_input('Input a rare disease term or a GARD ID.', 'Fellman syndrome')
with col2:
filtering = st.radio("What type of filtering would you like?",('Strict', 'Lenient', 'None'))
extract_diseases = st.checkbox("Extract Rare Diseases", value=False)
#max_results is Maximum number of PubMed ID's to retrieve BEFORE filtering
max_results = st.sidebar.number_input(label, min_value=1, max_value=None, value=50)
# https://docs.streamlit.io/library/api-reference/widgets/st.number_input
with col1:
with st.spinner('Loading Epidemiology Models and Dependencies...'):
classify_model_vars = classify_abs.init_classify_model()
st.success('Epidemiology Classification Model Loaded!')
NER_pipeline, entity_classes = extract_abs.init_NER_pipeline()
st.success('Epidemiology Extraction Model Loaded!')
GARD_dict, max_length = extract_abs.load_GARD_diseases()
st.success('All Models and Dependencies Loaded!')
# st.radio(label, options, index=0, format_func=special_internal_function, key=None, help=None, on_change=None, args=None, kwargs=None, *, disabled=False)
# https://docs.streamlit.io/library/api-reference/widgets/st.radio
#filtering = st.sidebar.radio("What type of filtering would you like?",('Strict', 'Lenient', 'None'))
#extract_diseases = st.sidebar.checkbox("Extract Rare Diseases", value=False)
# https://docs.streamlit.io/library/api-reference/widgets/st.checkbox
#LSTM RNN Epi Classifier Model
#with st.spinner('Loading Epidemiology Classification Model...'):
# classify_model_vars = classify_abs.init_classify_model()
#st.success('Epidemiology Classification Model Loaded!')
#GARD Dictionary - For filtering and exact match disease/GARD ID identification
#with st.spinner('Loading GARD Rare Disease Dictionary...'):
# GARD_dict, max_length = extract_abs.load_GARD_diseases()
#st.success('GARD Rare Disease Dictionary Loaded!')
#BioBERT-based NER pipeline, open `entities` to see
#with st.spinner('Loading Epidemiology Extraction Model...'):
# NER_pipeline, entity_classes = extract_abs.init_NER_pipeline()
#st.success('Epidemiology Extraction Model Loaded!')
#filtering options are 'strict','lenient'(default), 'none'
if text:
df = extract_abs.search_term_extraction(disease_or_gard_id, max_results, filtering,
NER_pipeline, entity_classes,
extract_diseases,GARD_dict, max_length,
classify_model_vars)
st.dataframe(df)
#st.dataframe(data=None, width=None, height=None)
# st.code(body, language="python")
|