File size: 11,520 Bytes
6c96578
 
 
 
 
 
 
 
dcfd073
6c96578
 
 
c02823d
6c96578
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c02823d
 
 
bc251bf
c02823d
 
 
 
c820969
bc251bf
 
 
 
c820969
 
30512d3
c820969
3afb663
c820969
3afb663
c7ec0f1
3afb663
c7ec0f1
bc251bf
3ed8b4b
62451e9
960fa25
 
 
 
 
 
3ed8b4b
 
 
 
 
c7ec0f1
3ed8b4b
c7ec0f1
c02823d
dcfd073
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7ec0f1
dcfd073
c7ec0f1
 
 
 
 
 
 
 
 
 
 
 
c02823d
 
 
 
 
 
 
 
5d12990
 
 
 
 
c02823d
011cb7f
c02823d
 
 
b59d991
c02823d
 
5d12990
b59d991
5481b27
b59d991
 
 
373dd05
3b63119
8034ef9
dcfd073
bc251bf
8034ef9
dcfd073
bc251bf
dcfd073
 
bc251bf
8034ef9
dcfd073
bc251bf
8034ef9
bc251bf
abfc6cb
3ed8b4b
3489df6
c7ec0f1
 
bc251bf
31fe7d7
c02823d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import gradio as gr
import torch
import numpy as np

import pandas as pd
from tqdm import tqdm

import matplotlib.pyplot as plt
import datetime

from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification

"""
description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotion in a sentence."
description_dataset = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."

inference_modelpath = "model/checkpoint-128"

def inference_sentence(text):
    tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
    model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
    for text in tqdm([text]):
        inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad(): # run model
        logits = model(**inputs).logits
        predicted_class_id = logits.argmax().item()
    output = model.config.id2label[predicted_class_id]
    return output

def frequencies(preds):
	preds_dict = {"neutral": 0, "anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0}
	for pred in preds:
		preds_dict[pred] = preds_dict[pred] + 1
	bars = list(preds_dict.keys())
	height = list(preds_dict.values())

	x_pos = np.arange(len(bars))
	plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
	plt.xticks(x_pos, bars)
	return plt
    
def inference_dataset(file_object, option_list):
    tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
    model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
    data_path = open(file_object.name, 'r')
    df = pd.read_csv(data_path, delimiter='\t', header=0, names=['id', 'text'])
    ids = df["id"].tolist()
    texts = df["text"].tolist()
    preds = []
    for text in tqdm(texts): # progressbar
        inputs = tokenizer(text, return_tensors="pt")
        with torch.no_grad(): # run model
            logits = model(**inputs).logits
        predicted_class_id = logits.argmax().item()
        prediction = model.config.id2label[predicted_class_id]
        preds.append(prediction)
    predictions_content = list(zip(ids, texts, preds))
    # write predictions to file
    output = "output.txt"
    f = open(output, 'w')
    f.write("id\ttext\tprediction\n")
    for line in predictions_content:
        f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
    output1 = output
    output2 = output3 = output4 = output5 = "This option was not selected."
    if "emotion frequencies" in option_list:
        output2 = frequencies(preds)
    else:
        output2 = None
    if "emotion distribution over time" in option_list:
        output3 = "This option was selected."
    if "peaks" in option_list:
        output4 = "This option was selected."
    if "topics" in option_list:
        output5 = "This option was selected."
    return [output1, output2, output3, output4, output5]

iface_sentence = gr.Interface(
            fn=inference_sentence,
            description = description_sentence,
            inputs = gr.Textbox(
                    label="Enter a sentence",
                    lines=1),
            outputs="text")

inputs = [gr.File(
            label="Upload a dataset"),
          gr.CheckboxGroup(
            ["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
            label = "Select options")]

outputs = [gr.File(),
           gr.Plot(label="Emotion frequencies"),
           gr.Textbox(label="Emotion distribution over time"),
           gr.Textbox(label="Peaks"),
           gr.Textbox(label="Topics")]

iface_dataset = gr.Interface(
            fn = inference_dataset,
            description = description_dataset,
            inputs=inputs,
            outputs = outputs)

iface = gr.TabbedInterface([iface_sentence, iface_dataset], ["Sentence", "Dataset"])

iface.queue().launch()
"""


def inference_sentence(text):
    output = "This sentence will be processed:\n" + text
    return output

def file(input_file, input_checks):
    output = "output.txt"
    f = open(output, 'w')
    f.write("The predictions come here.")
    f.close()
    if "emotion frequencies" in input_checks:
        return gr.update(value=output, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)  # next_button_freq becomes available
    elif "emotion distribution over time" in input_checks:
        return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)  # next_button_dist becomes available
    elif "peaks" in input_checks:
        return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)  # next_button_peaks becomes available
    elif "topics" in input_checks:
        return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)  # next_button_topics becomes available
    else:
        return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)  # no next_button becomes available

def freq(output_file, input_checks):
    preds_dict = {"neutral": 10, "anger": 8, "fear": 2, "joy": 15, "love": 3, "sadness": 4}
    bars = list(preds_dict.keys())
    height = list(preds_dict.values())
    
    x_pos = np.arange(len(bars))
    plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
    plt.xticks(x_pos, bars)
    if "emotion distribution over time" in input_checks:
        return gr.update(value=plt, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)  # next_button_dist becomes available
    elif "peaks" in input_checks:
        return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)  # next_button_peaks becomes available
    elif "topics" in input_checks:
        return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)  # next_button_topics becomes available
    else:
        return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)  # no next_button becomes available

def dist(output_file, input_checks):
    dataframe = pd.DataFrame({'date_of_week': np.array([datetime.datetime(2021, 11, i+1) for i in range(7)]),'classes': [5, 6, 8, 2, 3, 7, 4]})
    # Plotting the time series of given dataframe
    plt.plot(dataframe.date_of_week, dataframe.classes)
    
    # Giving title to the chart using plt.title
    plt.title('Emotions over time')
    
    # rotating the x-axis tick labels at 30degree
    # towards right
    plt.xticks(rotation=30, ha='right')
    
    # Providing x and y label to the chart
    plt.xlabel('Date')
    plt.ylabel('Number of emotions')
    if "peaks" in input_checks:
        return gr.update(value=plt, visible=True), gr.update(visible=True), gr.update(visible=False)  # next_button_peaks becomes available
    elif "topics" in input_checks:
        return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=True)  # next_button_topics becomes available
    else:
        return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=False)  # no next_button becomes available

def peaks(output_file, input_checks):
    output = "There  are  peaks on some dates."
    if "topics" in input_checks:
        return gr.update(value=output, visible=True), gr.update(visible=True)  # next_button_topics becomes available
    else:
        return gr.update(value=output, visible=True), gr.update(visible=False)  # no next_button becomes available

def topics(output_file, input_checks):
    output = "Some topics  are found."
    return gr.update(value=output, visible=True)  # no next_button becomes available

with gr.Blocks() as demo:
    with gr.Tab("Sentence"):
        gr.Markdown("""
        # Demo EmotioNL
        This demo allows you to analyse the emotion in a sentence.
        """)
        with gr.Row():
            with gr.Column():
                input = gr.Textbox(
                        label="Enter a sentence",
                        lines=1)
                send_btn = gr.Button("Send")
            output = gr.Textbox()
        send_btn.click(fn=inference_sentence, inputs=input, outputs=output)
    with gr.Tab("Dataset"):
        gr.Markdown("""
        # Demo EmotioNL
        This demo allows you to analyse the emotions in a dataset. The data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected.
        """)
        with gr.Row():
            with gr.Column():
                input_file = gr.File(
                    label="Upload a dataset")
                input_checks = gr.CheckboxGroup(
                    ["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
                    label = "Select options")
                send_btn = gr.Button("Send")
            with gr.Column():
                output_file = gr.File(label="Predictions", visible=False)
                next_button_freq = gr.Button("Show emotion frequencies", visible=False)
                
                output_plot = gr.Plot(label="Emotion frequencies", visible=False)
                next_button_dist = gr.Button("Show emotion distribution over time", visible=False)
                
                output_dist = gr.Plot(label="Emotion distribution over time", visible=False)
                next_button_peaks = gr.Button("Show peaks", visible=False)
                
                output_peaks = gr.Textbox(label="Peaks", visible=False)
                next_button_topics = gr.Button("Show topics", visible=False)
                
                output_topics = gr.Textbox(label="Topics", visible=False)
        
        send_btn.click(fn=file, inputs=[input_file,input_checks], outputs=[output_file,next_button_freq,next_button_dist,next_button_peaks,next_button_topics])
        next_button_freq.click(fn=freq, inputs=[output_file,input_checks], outputs=[output_plot,next_button_dist,next_button_peaks,next_button_topics])
        next_button_dist.click(fn=dist, inputs=[output_file,input_checks], outputs=[output_dist,next_button_peaks,next_button_topics])
        next_button_peaks.click(fn=peaks, inputs=[output_file,input_checks], outputs=[output_peaks,next_button_topics])
        next_button_topics.click(fn=topics, inputs=[output_file,input_checks], outputs=output_topics)

        
demo.launch()