Spaces:
Running
Running
File size: 11,520 Bytes
6c96578 dcfd073 6c96578 c02823d 6c96578 c02823d bc251bf c02823d c820969 bc251bf c820969 30512d3 c820969 3afb663 c820969 3afb663 c7ec0f1 3afb663 c7ec0f1 bc251bf 3ed8b4b 62451e9 960fa25 3ed8b4b c7ec0f1 3ed8b4b c7ec0f1 c02823d dcfd073 c7ec0f1 dcfd073 c7ec0f1 c02823d 5d12990 c02823d 011cb7f c02823d b59d991 c02823d 5d12990 b59d991 5481b27 b59d991 373dd05 3b63119 8034ef9 dcfd073 bc251bf 8034ef9 dcfd073 bc251bf dcfd073 bc251bf 8034ef9 dcfd073 bc251bf 8034ef9 bc251bf abfc6cb 3ed8b4b 3489df6 c7ec0f1 bc251bf 31fe7d7 c02823d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
import gradio as gr
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import datetime
from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification
"""
description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotion in a sentence."
description_dataset = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."
inference_modelpath = "model/checkpoint-128"
def inference_sentence(text):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
for text in tqdm([text]):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
output = model.config.id2label[predicted_class_id]
return output
def frequencies(preds):
preds_dict = {"neutral": 0, "anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0}
for pred in preds:
preds_dict[pred] = preds_dict[pred] + 1
bars = list(preds_dict.keys())
height = list(preds_dict.values())
x_pos = np.arange(len(bars))
plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
plt.xticks(x_pos, bars)
return plt
def inference_dataset(file_object, option_list):
tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
data_path = open(file_object.name, 'r')
df = pd.read_csv(data_path, delimiter='\t', header=0, names=['id', 'text'])
ids = df["id"].tolist()
texts = df["text"].tolist()
preds = []
for text in tqdm(texts): # progressbar
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad(): # run model
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
prediction = model.config.id2label[predicted_class_id]
preds.append(prediction)
predictions_content = list(zip(ids, texts, preds))
# write predictions to file
output = "output.txt"
f = open(output, 'w')
f.write("id\ttext\tprediction\n")
for line in predictions_content:
f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
output1 = output
output2 = output3 = output4 = output5 = "This option was not selected."
if "emotion frequencies" in option_list:
output2 = frequencies(preds)
else:
output2 = None
if "emotion distribution over time" in option_list:
output3 = "This option was selected."
if "peaks" in option_list:
output4 = "This option was selected."
if "topics" in option_list:
output5 = "This option was selected."
return [output1, output2, output3, output4, output5]
iface_sentence = gr.Interface(
fn=inference_sentence,
description = description_sentence,
inputs = gr.Textbox(
label="Enter a sentence",
lines=1),
outputs="text")
inputs = [gr.File(
label="Upload a dataset"),
gr.CheckboxGroup(
["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
label = "Select options")]
outputs = [gr.File(),
gr.Plot(label="Emotion frequencies"),
gr.Textbox(label="Emotion distribution over time"),
gr.Textbox(label="Peaks"),
gr.Textbox(label="Topics")]
iface_dataset = gr.Interface(
fn = inference_dataset,
description = description_dataset,
inputs=inputs,
outputs = outputs)
iface = gr.TabbedInterface([iface_sentence, iface_dataset], ["Sentence", "Dataset"])
iface.queue().launch()
"""
def inference_sentence(text):
output = "This sentence will be processed:\n" + text
return output
def file(input_file, input_checks):
output = "output.txt"
f = open(output, 'w')
f.write("The predictions come here.")
f.close()
if "emotion frequencies" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # next_button_freq becomes available
elif "emotion distribution over time" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) # next_button_dist becomes available
elif "peaks" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False) # next_button_peaks becomes available
elif "topics" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # no next_button becomes available
def freq(output_file, input_checks):
preds_dict = {"neutral": 10, "anger": 8, "fear": 2, "joy": 15, "love": 3, "sadness": 4}
bars = list(preds_dict.keys())
height = list(preds_dict.values())
x_pos = np.arange(len(bars))
plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
plt.xticks(x_pos, bars)
if "emotion distribution over time" in input_checks:
return gr.update(value=plt, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) # next_button_dist becomes available
elif "peaks" in input_checks:
return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False) # next_button_peaks becomes available
elif "topics" in input_checks:
return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # no next_button becomes available
def dist(output_file, input_checks):
dataframe = pd.DataFrame({'date_of_week': np.array([datetime.datetime(2021, 11, i+1) for i in range(7)]),'classes': [5, 6, 8, 2, 3, 7, 4]})
# Plotting the time series of given dataframe
plt.plot(dataframe.date_of_week, dataframe.classes)
# Giving title to the chart using plt.title
plt.title('Emotions over time')
# rotating the x-axis tick labels at 30degree
# towards right
plt.xticks(rotation=30, ha='right')
# Providing x and y label to the chart
plt.xlabel('Date')
plt.ylabel('Number of emotions')
if "peaks" in input_checks:
return gr.update(value=plt, visible=True), gr.update(visible=True), gr.update(visible=False) # next_button_peaks becomes available
elif "topics" in input_checks:
return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=False) # no next_button becomes available
def peaks(output_file, input_checks):
output = "There are peaks on some dates."
if "topics" in input_checks:
return gr.update(value=output, visible=True), gr.update(visible=True) # next_button_topics becomes available
else:
return gr.update(value=output, visible=True), gr.update(visible=False) # no next_button becomes available
def topics(output_file, input_checks):
output = "Some topics are found."
return gr.update(value=output, visible=True) # no next_button becomes available
with gr.Blocks() as demo:
with gr.Tab("Sentence"):
gr.Markdown("""
# Demo EmotioNL
This demo allows you to analyse the emotion in a sentence.
""")
with gr.Row():
with gr.Column():
input = gr.Textbox(
label="Enter a sentence",
lines=1)
send_btn = gr.Button("Send")
output = gr.Textbox()
send_btn.click(fn=inference_sentence, inputs=input, outputs=output)
with gr.Tab("Dataset"):
gr.Markdown("""
# Demo EmotioNL
This demo allows you to analyse the emotions in a dataset. The data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected.
""")
with gr.Row():
with gr.Column():
input_file = gr.File(
label="Upload a dataset")
input_checks = gr.CheckboxGroup(
["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
label = "Select options")
send_btn = gr.Button("Send")
with gr.Column():
output_file = gr.File(label="Predictions", visible=False)
next_button_freq = gr.Button("Show emotion frequencies", visible=False)
output_plot = gr.Plot(label="Emotion frequencies", visible=False)
next_button_dist = gr.Button("Show emotion distribution over time", visible=False)
output_dist = gr.Plot(label="Emotion distribution over time", visible=False)
next_button_peaks = gr.Button("Show peaks", visible=False)
output_peaks = gr.Textbox(label="Peaks", visible=False)
next_button_topics = gr.Button("Show topics", visible=False)
output_topics = gr.Textbox(label="Topics", visible=False)
send_btn.click(fn=file, inputs=[input_file,input_checks], outputs=[output_file,next_button_freq,next_button_dist,next_button_peaks,next_button_topics])
next_button_freq.click(fn=freq, inputs=[output_file,input_checks], outputs=[output_plot,next_button_dist,next_button_peaks,next_button_topics])
next_button_dist.click(fn=dist, inputs=[output_file,input_checks], outputs=[output_dist,next_button_peaks,next_button_topics])
next_button_peaks.click(fn=peaks, inputs=[output_file,input_checks], outputs=[output_peaks,next_button_topics])
next_button_topics.click(fn=topics, inputs=[output_file,input_checks], outputs=output_topics)
demo.launch()
|