File size: 3,620 Bytes
fb22d07
 
eb56f89
fb22d07
459ba42
 
 
 
 
 
 
 
 
 
 
 
 
bd93b63
 
459ba42
fb22d07
 
 
 
 
 
 
 
 
 
 
 
459ba42
fb22d07
 
 
 
 
 
 
e043bee
459ba42
e043bee
 
 
 
459ba42
e043bee
 
 
 
459ba42
 
 
 
 
 
 
 
 
 
 
 
e043bee
459ba42
e043bee
 
 
 
 
 
 
d75c440
87f6beb
459ba42
 
 
 
 
 
 
87f6beb
 
e043bee
 
 
459ba42
 
 
 
e043bee
 
 
 
 
 
fb22d07
459ba42
 
 
 
 
 
 
 
 
 
 
fb22d07
459ba42
170f9bb
 
fb22d07
87f6beb
 
459ba42
fb22d07
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import streamlit as st  # Web App
from main import classify
import pandas as pd

# demo_phrases = """ Here are some examples:
# this is a phrase
# is it neutral
# nothing else to say
# man I'm so damn angry
# sarcasm lol
# I love this product
# """
#demo_phrases = (
#    pd.read_csv("./train.csv")["comment_text"].head(6).astype(str).str.cat(sep="\n")
#)

df = pd.read_csv("./train.csv")
toxic = df[df['toxic'] == 1]['comment_text'].head(3)
normal = df[df['toxic'] == 0]['comment_text'].head(3)
demo_phrases = pd.concat([toxic, normal]).astype(str).str.cat(sep="\n")
# title
st.title("Sentiment Analysis")

# subtitle
st.markdown("## A selection of popular sentiment analysis models -  hosted on 🤗 Spaces")

model_name = st.selectbox(
    "Select a pre-trained model",
    [
        "finiteautomata/bertweet-base-sentiment-analysis",
        "ahmedrachid/FinancialBERT-Sentiment-Analysis",
        "finiteautomata/beto-sentiment-analysis",
        "NativeVex/custom-fine-tuned",
    ],
)

input_sentences = st.text_area("Sentences", value=demo_phrases, height=200)

data = input_sentences.split("\n")

from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_path = "bin/model4"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

from typing import List, Dict
import torch
import numpy as np
import pandas as pd


def infer(text: str) -> List[Dict[str, float]]:
    """Use custom model to infer sentiment

    Args:
        text (str): text to infer

    Returns:
        List[Dict[str, float]]: list of dictionaries with {sentiment:
        probability} score pairs

    """
    encoding = tokenizer(text, return_tensors="pt")
    encoding = {k: v.to(model.device) for k, v in encoding.items()}
    outputs = model(**encoding)
    logits = outputs.logits
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(logits.squeeze().cpu())
    predictions = np.zeros(probs.shape)
    predictions[np.where(probs >= 0.5)] = 1
    predictions = pd.Series(predictions == 1)
    l = pd.Series(zip(predictions.tolist(), probs.tolist())).apply(str)
    l.index = [
        "toxic",
        "severe_toxic",
        "obscene",
        "threat",
        "insult",
        "identity_hate",
    ]
    #probs.index = predictions.index
    return l.to_dict()


def wrapper(*args, **kwargs):
    """Wrapper function to use custom model

    Behaves as a switchboard to redirect if custom model is selected
    """
    if args[0] != "NativeVex/custom-fine-tuned":
        return classify(*args, **kwargs)
    else:
        return infer(text=args[1])


if st.button("Classify"):
    if not model_name.strip() == "NativeVex/custom-fine-tuned":
        st.write("Please allow a few minutes for the model to run/download")
        for i in range(len(data)):
            # j = wrapper(model_name.strip(), data[i])[0]
            j = classify(model_name.strip(), data[i])[0]
            sentiment = j["label"]
            confidence = j["score"]
            st.write(
                f"{i}. {data[i]} :: Classification - {sentiment} with confidence {confidence}"
            )
    else:
        st.write(
            "To render the dataframe, all inputs must be sequentially"
            " processed before displaying. Please allow a few minutes for longer"
            " inputs."
        )
        internal_list = [infer(text=i) for i in data]
        j = pd.DataFrame(internal_list)
        st.dataframe(data=j)


st.markdown(
    "Link to the app - [image-to-text-app on 🤗 Spaces](https://huggingface.co/spaces/Amrrs/image-to-text-app)"
)