Spaces:
Running
Running
File size: 4,108 Bytes
d7784f0 ddff90b 7ce5b82 ddff90b 7ce5b82 ddff90b 0416a61 f2852e3 0416a61 f2852e3 1656abd 0416a61 1656abd f2852e3 1656abd ddff90b 1656abd f2852e3 0416a61 f2852e3 0416a61 f2852e3 0416a61 f2852e3 0416a61 ddff90b f2852e3 7ead1f4 f2852e3 7ead1f4 f2852e3 7ead1f4 f2852e3 7ead1f4 ddff90b 0416a61 3ed57d7 1656abd f2852e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import nltk
nltk.download('stopwords')
import pandas as pd
#classify_abs is a dependency for extract_abs
import classify_abs
import extract_abs
#pd.set_option('display.max_colwidth', None)
import streamlit as st
########## Title for the Web App ##########
st.title("Epidemiology Extraction Pipeline for Rare Diseases")
st.subheader("by the National Center for Advancing Translational Sciences (NIH/NCATS)")
#st.header(body, anchor=None)
#st.subheader(body, anchor=None)
#Anchor is for the URL, can be custom str
# https://docs.streamlit.io/library/api-reference/text/st.markdown
'''
col1, col2 = st.columns(2)
with col1:
st.header("Rare ")
disease_or_gard_id = st.text_input('Input a rare disease term or a GARD ID.', 'Fellman syndrome')
with col2:
filtering = st.radio("What type of filtering would you like?",('Strict', 'Lenient', 'None'))
extract_diseases = st.checkbox("Extract Rare Diseases", value=False)
#max_results is Maximum number of PubMed ID's to retrieve BEFORE filtering
max_results = st.number_input("Maximum number of articles to find in PubMed", min_value=1, max_value=None, value=50)
# https://docs.streamlit.io/library/api-reference/widgets/st.number_input
with col1:
with st.spinner('Loading Epidemiology Models and Dependencies...'):
classify_model_vars = classify_abs.init_classify_model()
st.success('Epidemiology Classification Model Loaded!')
NER_pipeline, entity_classes = extract_abs.init_NER_pipeline()
st.success('Epidemiology Extraction Model Loaded!')
GARD_dict, max_length = extract_abs.load_GARD_diseases()
st.success('All Models and Dependencies Loaded!')
'''
#max_results is Maximum number of PubMed ID's to retrieve BEFORE filtering
max_results = st.sidebar.number_input("Maximum number of articles to find in PubMed", min_value=1, max_value=None, value=50)
# https://docs.streamlit.io/library/api-reference/widgets/st.number_input
# st.radio(label, options, index=0, format_func=special_internal_function, key=None, help=None, on_change=None, args=None, kwargs=None, *, disabled=False)
# https://docs.streamlit.io/library/api-reference/widgets/st.radio
filtering = st.sidebar.radio("What type of filtering would you like?",('Strict', 'Lenient', 'None'))
extract_diseases = st.sidebar.checkbox("Extract Rare Diseases", value=False)
# https://docs.streamlit.io/library/api-reference/widgets/st.checkbox
with st.spinner('Loading Epidemiology Models and Dependencies...'):
classify_model_vars = classify_abs.init_classify_model()
st.success('Epidemiology Classification Model Loaded!')
NER_pipeline, entity_classes = extract_abs.init_NER_pipeline()
st.success('Epidemiology Extraction Model Loaded!')
GARD_dict, max_length = extract_abs.load_GARD_diseases()
st.success('All Models and Dependencies Loaded!')
'''
#LSTM RNN Epi Classifier Model
with st.spinner('Loading Epidemiology Classification Model...'):
classify_model_vars = classify_abs.init_classify_model()
st.success('Epidemiology Classification Model Loaded!')
#GARD Dictionary - For filtering and exact match disease/GARD ID identification
with st.spinner('Loading GARD Rare Disease Dictionary...'):
GARD_dict, max_length = extract_abs.load_GARD_diseases()
st.success('GARD Rare Disease Dictionary Loaded!')
#BioBERT-based NER pipeline, open `entities` to see
with st.spinner('Loading Epidemiology Extraction Model...'):
NER_pipeline, entity_classes = extract_abs.init_NER_pipeline()
st.success('Epidemiology Extraction Model Loaded!')
'''
#filtering options are 'strict','lenient'(default), 'none'
if text:
df = extract_abs.search_term_extraction(disease_or_gard_id, max_results, filtering,
NER_pipeline, entity_classes,
extract_diseases,GARD_dict, max_length,
classify_model_vars)
st.dataframe(df)
st.balloons()
#st.dataframe(data=None, width=None, height=None)
# st.code(body, language="python") |