Spaces:

lunadebruyne
/

EmotioNL

Running

File size: 11,520 Bytes

6c96578
 
 
 
 
 
 
 
dcfd073
6c96578
 
 
c02823d
6c96578
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c02823d
 
 
bc251bf
c02823d
 
 
 
c820969
bc251bf
 
 
 
c820969
 
30512d3
c820969
3afb663
c820969
3afb663
c7ec0f1
3afb663
c7ec0f1
bc251bf
3ed8b4b
62451e9
960fa25
 
 
 
 
 
3ed8b4b
 
 
 
 
c7ec0f1
3ed8b4b
c7ec0f1
c02823d
dcfd073
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7ec0f1
dcfd073
c7ec0f1
 
 
 
 
 
 
 
 
 
 
 
c02823d
 
 
 
 
 
 
 
5d12990
 
 
 
 
c02823d
011cb7f
c02823d
 
 
b59d991
c02823d
 
5d12990
b59d991
5481b27
b59d991
 
 
373dd05
3b63119
8034ef9
dcfd073
bc251bf
8034ef9
dcfd073
bc251bf
dcfd073
 
bc251bf
8034ef9
dcfd073
bc251bf
8034ef9
bc251bf
abfc6cb
3ed8b4b
3489df6
c7ec0f1
 
bc251bf
31fe7d7
c02823d

import gradio as gr
import torch
import numpy as np

import pandas as pd
from tqdm import tqdm

import matplotlib.pyplot as plt
import datetime

from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForSequenceClassification

"""
description_sentence = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotion in a sentence."
description_dataset = "<h3>Demo EmotioNL</h3>\nThis demo allows you to analyse the emotions in a dataset.\nThe data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected."

inference_modelpath = "model/checkpoint-128"

def inference_sentence(text):
    tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
    model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
    for text in tqdm([text]):
        inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad(): # run model
        logits = model(**inputs).logits
        predicted_class_id = logits.argmax().item()
    output = model.config.id2label[predicted_class_id]
    return output

def frequencies(preds):
	preds_dict = {"neutral": 0, "anger": 0, "fear": 0, "joy": 0, "love": 0, "sadness": 0}
	for pred in preds:
		preds_dict[pred] = preds_dict[pred] + 1
	bars = list(preds_dict.keys())
	height = list(preds_dict.values())

	x_pos = np.arange(len(bars))
	plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
	plt.xticks(x_pos, bars)
	return plt
    
def inference_dataset(file_object, option_list):
    tokenizer = AutoTokenizer.from_pretrained(inference_modelpath)
    model = AutoModelForSequenceClassification.from_pretrained(inference_modelpath)
    data_path = open(file_object.name, 'r')
    df = pd.read_csv(data_path, delimiter='\t', header=0, names=['id', 'text'])
    ids = df["id"].tolist()
    texts = df["text"].tolist()
    preds = []
    for text in tqdm(texts): # progressbar
        inputs = tokenizer(text, return_tensors="pt")
        with torch.no_grad(): # run model
            logits = model(**inputs).logits
        predicted_class_id = logits.argmax().item()
        prediction = model.config.id2label[predicted_class_id]
        preds.append(prediction)
    predictions_content = list(zip(ids, texts, preds))
    # write predictions to file
    output = "output.txt"
    f = open(output, 'w')
    f.write("id\ttext\tprediction\n")
    for line in predictions_content:
        f.write(str(line[0]) + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n')
    output1 = output
    output2 = output3 = output4 = output5 = "This option was not selected."
    if "emotion frequencies" in option_list:
        output2 = frequencies(preds)
    else:
        output2 = None
    if "emotion distribution over time" in option_list:
        output3 = "This option was selected."
    if "peaks" in option_list:
        output4 = "This option was selected."
    if "topics" in option_list:
        output5 = "This option was selected."
    return [output1, output2, output3, output4, output5]

iface_sentence = gr.Interface(
            fn=inference_sentence,
            description = description_sentence,
            inputs = gr.Textbox(
                    label="Enter a sentence",
                    lines=1),
            outputs="text")

inputs = [gr.File(
            label="Upload a dataset"),
          gr.CheckboxGroup(
            ["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
            label = "Select options")]

outputs = [gr.File(),
           gr.Plot(label="Emotion frequencies"),
           gr.Textbox(label="Emotion distribution over time"),
           gr.Textbox(label="Peaks"),
           gr.Textbox(label="Topics")]

iface_dataset = gr.Interface(
            fn = inference_dataset,
            description = description_dataset,
            inputs=inputs,
            outputs = outputs)

iface = gr.TabbedInterface([iface_sentence, iface_dataset], ["Sentence", "Dataset"])

iface.queue().launch()
"""


def inference_sentence(text):
    output = "This sentence will be processed:\n" + text
    return output

def file(input_file, input_checks):
    output = "output.txt"
    f = open(output, 'w')
    f.write("The predictions come here.")
    f.close()
    if "emotion frequencies" in input_checks:
        return gr.update(value=output, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)  # next_button_freq becomes available
    elif "emotion distribution over time" in input_checks:
        return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)  # next_button_dist becomes available
    elif "peaks" in input_checks:
        return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)  # next_button_peaks becomes available
    elif "topics" in input_checks:
        return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)  # next_button_topics becomes available
    else:
        return gr.update(value=output, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)  # no next_button becomes available

def freq(output_file, input_checks):
    preds_dict = {"neutral": 10, "anger": 8, "fear": 2, "joy": 15, "love": 3, "sadness": 4}
    bars = list(preds_dict.keys())
    height = list(preds_dict.values())
    
    x_pos = np.arange(len(bars))
    plt.bar(x_pos, height, color=['lightgrey', 'firebrick', 'rebeccapurple', 'orange', 'palevioletred', 'cornflowerblue'])
    plt.xticks(x_pos, bars)
    if "emotion distribution over time" in input_checks:
        return gr.update(value=plt, visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)  # next_button_dist becomes available
    elif "peaks" in input_checks:
        return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)  # next_button_peaks becomes available
    elif "topics" in input_checks:
        return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)  # next_button_topics becomes available
    else:
        return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)  # no next_button becomes available

def dist(output_file, input_checks):
    dataframe = pd.DataFrame({'date_of_week': np.array([datetime.datetime(2021, 11, i+1) for i in range(7)]),'classes': [5, 6, 8, 2, 3, 7, 4]})
    # Plotting the time series of given dataframe
    plt.plot(dataframe.date_of_week, dataframe.classes)
    
    # Giving title to the chart using plt.title
    plt.title('Emotions over time')
    
    # rotating the x-axis tick labels at 30degree
    # towards right
    plt.xticks(rotation=30, ha='right')
    
    # Providing x and y label to the chart
    plt.xlabel('Date')
    plt.ylabel('Number of emotions')
    if "peaks" in input_checks:
        return gr.update(value=plt, visible=True), gr.update(visible=True), gr.update(visible=False)  # next_button_peaks becomes available
    elif "topics" in input_checks:
        return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=True)  # next_button_topics becomes available
    else:
        return gr.update(value=plt, visible=True), gr.update(visible=False), gr.update(visible=False)  # no next_button becomes available

def peaks(output_file, input_checks):
    output = "There  are  peaks on some dates."
    if "topics" in input_checks:
        return gr.update(value=output, visible=True), gr.update(visible=True)  # next_button_topics becomes available
    else:
        return gr.update(value=output, visible=True), gr.update(visible=False)  # no next_button becomes available

def topics(output_file, input_checks):
    output = "Some topics  are found."
    return gr.update(value=output, visible=True)  # no next_button becomes available

with gr.Blocks() as demo:
    with gr.Tab("Sentence"):
        gr.Markdown("""
        # Demo EmotioNL
        This demo allows you to analyse the emotion in a sentence.
        """)
        with gr.Row():
            with gr.Column():
                input = gr.Textbox(
                        label="Enter a sentence",
                        lines=1)
                send_btn = gr.Button("Send")
            output = gr.Textbox()
        send_btn.click(fn=inference_sentence, inputs=input, outputs=output)
    with gr.Tab("Dataset"):
        gr.Markdown("""
        # Demo EmotioNL
        This demo allows you to analyse the emotions in a dataset. The data should be in tsv-format with two named columns: the first column (id) should contain the sentence IDs, and the second column (text) should contain the actual texts. Optionally, there is a third column named 'date', which specifies the date associated with the text (e.g., tweet date). This column is necessary when the options 'emotion distribution over time' and 'peaks' are selected.
        """)
        with gr.Row():
            with gr.Column():
                input_file = gr.File(
                    label="Upload a dataset")
                input_checks = gr.CheckboxGroup(
                    ["emotion frequencies", "emotion distribution over time", "peaks", "topics"],
                    label = "Select options")
                send_btn = gr.Button("Send")
            with gr.Column():
                output_file = gr.File(label="Predictions", visible=False)
                next_button_freq = gr.Button("Show emotion frequencies", visible=False)
                
                output_plot = gr.Plot(label="Emotion frequencies", visible=False)
                next_button_dist = gr.Button("Show emotion distribution over time", visible=False)
                
                output_dist = gr.Plot(label="Emotion distribution over time", visible=False)
                next_button_peaks = gr.Button("Show peaks", visible=False)
                
                output_peaks = gr.Textbox(label="Peaks", visible=False)
                next_button_topics = gr.Button("Show topics", visible=False)
                
                output_topics = gr.Textbox(label="Topics", visible=False)
        
        send_btn.click(fn=file, inputs=[input_file,input_checks], outputs=[output_file,next_button_freq,next_button_dist,next_button_peaks,next_button_topics])
        next_button_freq.click(fn=freq, inputs=[output_file,input_checks], outputs=[output_plot,next_button_dist,next_button_peaks,next_button_topics])
        next_button_dist.click(fn=dist, inputs=[output_file,input_checks], outputs=[output_dist,next_button_peaks,next_button_topics])
        next_button_peaks.click(fn=peaks, inputs=[output_file,input_checks], outputs=[output_peaks,next_button_topics])
        next_button_topics.click(fn=topics, inputs=[output_file,input_checks], outputs=output_topics)

        
demo.launch()