File size: 3,924 Bytes
8504fa5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import sys
import os

import streamlit as st

from PIL import Image
import pandas as pd

from transformers import pipeline
import spacy
import en_core_web_lg

current = os.path.dirname(os.path.realpath(__file__))
parent = os.path.dirname(current)
sys.path.append(parent)
from helpers import display_nli_pr_results_as_list, prompt_to_nli, get_who_what_whom_qa

@st.cache(allow_output_mutation=True)
def load_spacy_pipeline():
    return en_core_web_lg.load()

def choose_text_menu(text):
    if 'text' not in st.session_state:
        st.session_state.text = 'Several demonstrators were injured.'
    text = st.text_area('Event description', st.session_state.text)

    return text

# # Load Models in cache
@st.cache(allow_output_mutation=True)
def load_model_prompting():
    return pipeline("fill-mask", model="distilbert-base-uncased")

@st.cache(allow_output_mutation=True)
def load_model_nli():
    return pipeline(task="sentiment-analysis", model="roberta-large-mnli")

@st.cache(allow_output_mutation=True)
def load_model_qa():
    model_name = "deepset/roberta-base-squad2"
    model = pipeline(model=model_name, tokenizer=model_name, task="question-answering")
    return model

nlp = load_spacy_pipeline()

### App START
st.markdown("""# Rethinking the Event Coding Pipeline with Prompt Entailment
## Author: Anonymized for submission""")

st.markdown("### 1. Actor-target coding (experimental):")
@st.cache()
def load_qa_image():
    return Image.open('pipeline_qa.png')
st.image(load_qa_image(),caption="""Actor-target Coding Flow. First we get the entailed answer candidates through the PR-ENT pipeline. 
Then we construct questions based on these tokens to extract actors and targets, 2 questions per verb. 
Finally, we pass these questions and event description to a pre-trained extractive question answering model and fill a table of [Actor, Action, Target].""")

st.markdown("""
Here we use an extractive question answering model to find the actor and target of an event.
As this is still in experimental phase, there are some limitations:
- The only template possible is `People were [Z].`, this allows us to get a verb to construct the two questions:
- `Who was [Z]?` to find the target.
- `Who [Z] people?` to find the actor.
- `top_k = 10` and `entailment_threshold = 0.5`.
The results of the QA are given along the confidence score of the model in brackets `[xx.x%]`
""")

## Load Models
model_nli = load_model_nli()
model_prompting = load_model_prompting()
model_qa = load_model_qa()



st.markdown("""
### 2. Write an event description:
The first step is to write an event description that will be fed to the pipeline. This can be any text in English.
""")
text = choose_text_menu('')
st.session_state.text = text


st.markdown("""
### 3. Run actor-target coding:
""")

if "old_text_qa" not in st.session_state:
    st.session_state.old_text_qa =st.session_state.text

qa_button = st.button("Run actor-target coding")
if qa_button:
    computation_state_qa = st.text("Computation Running.")
    st.session_state.old_text_qa =st.session_state.text
    prompt = "People were {}."
    results = prompt_to_nli(text, prompt, model_prompting, model_nli, nlp, 10, 0.5, True)
    list_results = [x[0][0] + ' ' + str(int(x[1][1]*100)) + '%' for x in results]
    st.session_state.list_results_prompt_qa = list_results
    list_tokens = [x[0][0] for x in results]
    who_what_whom = get_who_what_whom_qa(text, list_tokens, model_qa)
    st.session_state.who_what_whom = who_what_whom
    computation_state_qa.text("Computation Done.")

if 'who_what_whom' in st.session_state:
    st.write('**Event Description**: {}'.format(st.session_state.old_text_qa))
    st.write('**Template**: "{}"; **Top K**: {}; **Entailment Threshold**: {}.'.format("People were [Z]",10, 0.5))
    display_nli_pr_results_as_list('', st.session_state.list_results_prompt_qa)
    st.write(pd.DataFrame(st.session_state.who_what_whom))