Spaces:
Runtime error
Runtime error
File size: 3,620 Bytes
fb22d07 eb56f89 fb22d07 459ba42 bd93b63 459ba42 fb22d07 459ba42 fb22d07 e043bee 459ba42 e043bee 459ba42 e043bee 459ba42 e043bee 459ba42 e043bee d75c440 87f6beb 459ba42 87f6beb e043bee 459ba42 e043bee fb22d07 459ba42 fb22d07 459ba42 170f9bb fb22d07 87f6beb 459ba42 fb22d07 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import streamlit as st # Web App
from main import classify
import pandas as pd
# demo_phrases = """ Here are some examples:
# this is a phrase
# is it neutral
# nothing else to say
# man I'm so damn angry
# sarcasm lol
# I love this product
# """
#demo_phrases = (
# pd.read_csv("./train.csv")["comment_text"].head(6).astype(str).str.cat(sep="\n")
#)
df = pd.read_csv("./train.csv")
toxic = df[df['toxic'] == 1]['comment_text'].head(3)
normal = df[df['toxic'] == 0]['comment_text'].head(3)
demo_phrases = pd.concat([toxic, normal]).astype(str).str.cat(sep="\n")
# title
st.title("Sentiment Analysis")
# subtitle
st.markdown("## A selection of popular sentiment analysis models - hosted on 🤗 Spaces")
model_name = st.selectbox(
"Select a pre-trained model",
[
"finiteautomata/bertweet-base-sentiment-analysis",
"ahmedrachid/FinancialBERT-Sentiment-Analysis",
"finiteautomata/beto-sentiment-analysis",
"NativeVex/custom-fine-tuned",
],
)
input_sentences = st.text_area("Sentences", value=demo_phrases, height=200)
data = input_sentences.split("\n")
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_path = "bin/model4"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
from typing import List, Dict
import torch
import numpy as np
import pandas as pd
def infer(text: str) -> List[Dict[str, float]]:
"""Use custom model to infer sentiment
Args:
text (str): text to infer
Returns:
List[Dict[str, float]]: list of dictionaries with {sentiment:
probability} score pairs
"""
encoding = tokenizer(text, return_tensors="pt")
encoding = {k: v.to(model.device) for k, v in encoding.items()}
outputs = model(**encoding)
logits = outputs.logits
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits.squeeze().cpu())
predictions = np.zeros(probs.shape)
predictions[np.where(probs >= 0.5)] = 1
predictions = pd.Series(predictions == 1)
l = pd.Series(zip(predictions.tolist(), probs.tolist())).apply(str)
l.index = [
"toxic",
"severe_toxic",
"obscene",
"threat",
"insult",
"identity_hate",
]
#probs.index = predictions.index
return l.to_dict()
def wrapper(*args, **kwargs):
"""Wrapper function to use custom model
Behaves as a switchboard to redirect if custom model is selected
"""
if args[0] != "NativeVex/custom-fine-tuned":
return classify(*args, **kwargs)
else:
return infer(text=args[1])
if st.button("Classify"):
if not model_name.strip() == "NativeVex/custom-fine-tuned":
st.write("Please allow a few minutes for the model to run/download")
for i in range(len(data)):
# j = wrapper(model_name.strip(), data[i])[0]
j = classify(model_name.strip(), data[i])[0]
sentiment = j["label"]
confidence = j["score"]
st.write(
f"{i}. {data[i]} :: Classification - {sentiment} with confidence {confidence}"
)
else:
st.write(
"To render the dataframe, all inputs must be sequentially"
" processed before displaying. Please allow a few minutes for longer"
" inputs."
)
internal_list = [infer(text=i) for i in data]
j = pd.DataFrame(internal_list)
st.dataframe(data=j)
st.markdown(
"Link to the app - [image-to-text-app on 🤗 Spaces](https://huggingface.co/spaces/Amrrs/image-to-text-app)"
)
|