Spaces:
Sleeping
Sleeping
File size: 3,805 Bytes
8504fa5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import sys
import os
import streamlit as st
from PIL import Image
import pandas as pd
from transformers import pipeline
import spacy
import en_core_web_lg
current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)
from helpers import display_nli_pr_results_as_list, prompt_to_nli, get_who_what_whom_qa
@st.cache(allow_output_mutation=True)
def load_spacy_pipeline():
return en_core_web_lg.load()
def choose_text_menu(text):
if 'text' not in st.session_state:
st.session_state.text = 'Several demonstrators were injured.'
text = st.text_area('Event description', st.session_state.text)
return text
# # Load Models in cache
@st.cache(allow_output_mutation=True)
def load_model_prompting():
return pipeline("fill-mask", model="distilbert-base-uncased")
@st.cache(allow_output_mutation=True)
def load_model_nli():
return pipeline(task="sentiment-analysis", model="roberta-large-mnli")
@st.cache(allow_output_mutation=True)
def load_model_qa():
model_name = "deepset/roberta-base-squad2"
model = pipeline(model=model_name, tokenizer=model_name, task="question-answering")
return model
nlp = load_spacy_pipeline()
### App START
st.markdown("### 1. Actor-target coding (experimental):")
@st.cache()
def load_qa_image():
return Image.open('pipeline_qa.png')
st.image(load_qa_image(),caption="""Actor-target Coding Flow. First we get the entailed answer candidates through the PR-ENT pipeline.
Then we construct questions based on these tokens to extract actors and targets, 2 questions per verb.
Finally, we pass these questions and event description to a pre-trained extractive question answering model and fill a table of [Actor, Action, Target].""")
st.markdown("""
Here we use an extractive question answering model to find the actor and target of an event.
As this is still in experimental phase, there are some limitations:
- The only template possible is `People were [Z].`, this allows us to get a verb to construct the two questions:
- `Who was [Z]?` to find the target.
- `Who [Z] people?` to find the actor.
- `top_k = 10` and `entailment_threshold = 0.5`.
The results of the QA are given along the confidence score of the model in brackets `[xx.x%]`
""")
## Load Models
model_nli = load_model_nli()
model_prompting = load_model_prompting()
model_qa = load_model_qa()
st.markdown("""
### 2. Write an event description:
The first step is to write an event description that will be fed to the pipeline. This can be any text in English.
""")
text = choose_text_menu('')
st.session_state.text = text
st.markdown("""
### 3. Run actor-target coding:
""")
if "old_text_qa" not in st.session_state:
st.session_state.old_text_qa =st.session_state.text
qa_button = st.button("Run actor-target coding")
if qa_button:
computation_state_qa = st.text("Computation Running.")
st.session_state.old_text_qa =st.session_state.text
prompt = "People were {}."
results = prompt_to_nli(text, prompt, model_prompting, model_nli, nlp, 10, 0.5, True)
list_results = [x[0][0] + ' ' + str(int(x[1][1]*100)) + '%' for x in results]
st.session_state.list_results_prompt_qa = list_results
list_tokens = [x[0][0] for x in results]
who_what_whom = get_who_what_whom_qa(text, list_tokens, model_qa)
st.session_state.who_what_whom = who_what_whom
computation_state_qa.text("Computation Done.")
if 'who_what_whom' in st.session_state:
st.write('**Event Description**: {}'.format(st.session_state.old_text_qa))
st.write('**Template**: "{}"; **Top K**: {}; **Entailment Threshold**: {}.'.format("People were [Z]",10, 0.5))
display_nli_pr_results_as_list('', st.session_state.list_results_prompt_qa)
st.write(pd.DataFrame(st.session_state.who_what_whom))
|