Spaces:

OOlajide
/

common-nlp-tasks

Sleeping

File size: 5,568 Bytes

3454d4e
a597d85
3454d4e
 
 
f0f9974
3454d4e
b7d66a6
3454d4e
 
b7d66a6
d629bf0
3454d4e
 
3392db1
fae4d2b
dac12cb
 
3454d4e
 
 
d7cee39
 
3454d4e
 
 
 
a597d85
 
3454d4e
 
b7d66a6
baacd14
c159493
baacd14
3454d4e
6c15309
 
baacd14
 
2ff38f2
3454d4e
2ff38f2
 
3454d4e
534ce84
ef6bd85
e2a7aa8
 
3454d4e
 
1ab0b83
8bfdd29
 
baacd14
a3068be
 
2ff38f2
 
a3068be
 
 
e28154f
3454d4e
d7cee39
baacd14
3454d4e
 
d7cee39
 
a64958d
d7cee39
3454d4e
2ff38f2
 
3454d4e
a64958d
f0f9974
3454d4e
ae23c23
 
a9e7bc2
 
 
 
 
 
 
 
 
 
ae23c23
2ff38f2
3454d4e
2ff38f2
 
25a1096
2ff38f2
 
25a1096
 
aa43d4c

import streamlit as st
from transformers import pipeline

st.set_page_config(page_title="Common NLP Tasks")
st.title("Common NLP Tasks")
st.subheader("Use the menu on the left to select a NLP task (click on > if closed).")

expander = st.sidebar.expander("About")
expander.write("This web app allows you to perform common Natural Language Processing tasks, select a task below to get started.")

st.sidebar.header("What will you like to do?")
option = st.sidebar.radio("", ["Extractive question answering", "Text summarization", "Text generation"])

@st.cache(show_spinner=False, allow_output_mutation=True)
def question_model():
    model_name = "deepset/roberta-base-squad2"
    question_answerer = pipeline(model=model_name, tokenizer=model_name, revision="v1.0", task="question-answering")
    return question_answerer

@st.cache(show_spinner=False, allow_output_mutation=True)
def summarization_model():
    model_name = "google/pegasus-xsum"
    summarizer = pipeline(model=model_name, tokenizer=model_name, task="summarization")
    return summarizer

@st.cache(show_spinner=False, allow_output_mutation=True)
def generation_model():
    model_name = "distilgpt2"
    generator = pipeline(model=model_name, tokenizer=model_name, task="text-generation")
    return generator

if option == "Extractive question answering":
    st.markdown("<h2 style='text-align: center; color:grey;'>Extract answer from text</h2>", unsafe_allow_html=True)
    source = st.radio("How would you like to start? Choose an option below", ["I want to input some text", "I want to upload a file"])
    sample_question = "What did the shepherd boy do to amuse himself?"
    if source == "I want to input some text":
        with open("sample.txt", "r") as text_file:
            sample_text = text_file.read()
        context = st.text_area("Use the example below or input your own text in English (10,000 characters max)", value=sample_text, max_chars=10000, height=330)
        question = st.text_input(label="Use the question below or enter your own question", value=sample_question)
        button = st.button("Get answer")
        if button:
            with st.spinner(text="Loading question model..."):
                question_answerer = question_model()
            with st.spinner(text="Getting answer..."):
                answer = question_answerer(context=context, question=question)
                answer = answer["answer"]
                html_str = f"<p style='color:red;'>{answer}</p>"
                st.markdown(html_str, unsafe_allow_html=True)
    elif source == "I want to upload a file":
        uploaded_file = st.file_uploader("Choose a .txt file to upload", type=["txt"])
        if uploaded_file is not None:
            raw_text = str(uploaded_file.read(),"utf-8")
            context = st.text_area("", value=raw_text, height=330)
            question = st.text_input(label="Enter your question", value=sample_question)
            button = st.button("Get answer")
            if button:
                with st.spinner(text="Loading summarization model..."):
                    question_answerer = question_model()
                with st.spinner(text="Getting answer..."):
                    answer = question_answerer(context=context, question=question)
                    answer = answer["answer"]
                    st.text(answer)

elif option == "Text summarization":
    st.markdown("<h2 style='text-align: center; color:grey;'>Summarize text</h2>", unsafe_allow_html=True)
    source = st.radio("How would you like to start? Choose an option below", ["I want to input some text", "I want to upload a file"])
    if source == "I want to input some text":
        with open("sample.txt", "r") as text_file:
            sample_text = text_file.read()
        text = st.text_area("Input a text in English (10,000 characters max) or use the example below", value=sample_text, max_chars=10000, height=330)
        button = st.button("Get summary")
        if button:
            with st.spinner(text="Loading summarization model..."):
                summarizer = summarization_model()
            with st.spinner(text="Summarizing text..."):
                summary = summarizer(text, max_length=130, min_length=30)
                st.write(summary[0]["summary_text"])

    elif source == "I want to upload a file":
        uploaded_file = st.file_uploader("Choose a .txt file to upload", type=["txt"])
        if uploaded_file is not None:
            raw_text = str(uploaded_file.read(),"utf-8")
            text = st.text_area("", value=raw_text, height=330)
            button = st.button("Get summary")
            if button:
                with st.spinner(text="Loading summarization model..."):
                    summarizer = summarization_model()
                with st.spinner(text="Summarizing text..."):
                    summary = summarizer(text, max_length=130, min_length=30)
                    st.write(summary[0]["summary_text"])
                
elif option == "Text generation":
    st.markdown("<h2 style='text-align: center; color:grey;'>Generate text</h2>", unsafe_allow_html=True)
    text = st.text_input(label="Enter one line of text and let the NLP model generate the rest for you")
    button = st.button("Generate text")
    if button:
        with st.spinner(text="Loading text generation model..."):
            generator = generation_model()
        with st.spinner(text="Generating text..."):
            generated_text = generator(text, max_length=50)
            st.write(generated_text[0]["generated_text"])