Spaces:
Runtime error
Runtime error
File size: 2,119 Bytes
1a5f890 bf79d0d 9a33247 bf79d0d 2268b75 5c49b11 33ec467 5c49b11 dd26b28 2268b75 5c49b11 9a8f76b c125d59 c5be7aa 9a8f76b c125d59 c5be7aa 9a8f76b c125d59 67d3612 9a8f76b c125d59 c5be7aa 9a8f76b 5c49b11 2268b75 1345311 5c49b11 9a33247 1345311 67e928c 53a5efc 9a33247 36ce82a bf79d0d 5c49b11 79c7e0d 8b74bcc 36ce82a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import streamlit as st
import plotly.express as px
import torch
from torch import nn
from transformers import AutoTokenizer, AutoModelForSequenceClassification
option = st.selectbox("Select a toxicity analysis model:", ("RoBERTa", "DistilBERT", "XLM-RoBERTa"))
defaultTxt = "I hate you cancerous insects so much"
txt = st.text_area("Text to analyze", defaultTxt)
st.button("Submit Text")
# Load tokenizer and model weights, try to default to RoBERTa.
# Huggingface does not support Python 3.10 match statements and I'm too lazy to implement an equivalent.
if (option == "RoBERTa"):
tokenizerPath = "s-nlp/roberta_toxicity_classifier"
modelPath = "s-nlp/roberta_toxicity_classifier"
neutralIndex = 0
toxicIndex = 1
elif (option == "DistilBERT"):
tokenizerPath = "citizenlab/distilbert-base-multilingual-cased-toxicity"
modelPath = "citizenlab/distilbert-base-multilingual-cased-toxicity"
neutralIndex = 1
toxicIndex = 0
elif (option == "XLM-RoBERTa"):
tokenizerPath = "unitary/multilingual-toxic-xlm-roberta"
modelPath = "unitary/multilingual-toxic-xlm-roberta"
neutralIndex = 1
toxicIndex = 0
else:
tokenizerPath = "s-nlp/roberta_toxicity_classifier"
modelPath = "s-nlp/roberta_toxicity_classifier"
neutralIndex = 0
toxicIndex = 1
tokenizer = AutoTokenizer.from_pretrained(tokenizerPath)
model = AutoModelForSequenceClassification.from_pretrained(modelPath)
# Run encoding through model to get classification output.
# RoBERTA: [0]: neutral, [1]: toxic
encoding = tokenizer.encode(txt, return_tensors='pt')
result = model(encoding)
# Transform logit to get probabilities.
if (result.logits.size(dim=1) < 2):
pad = (0, 1)
result.logits = nn.functional.pad(result.logits, pad, "constant", 0)
prediction = nn.functional.softmax(result.logits, dim=-1)
neutralProb = prediction.data[0][neutralIndex]
toxicProb = prediction.data[0][toxicIndex]
# Expected returns from RoBERTa on default text:
# Neutral: 0.0052
# Toxic: 0.9948
st.write("Classification Probabilities")
st.write(f"{neutralProb:.4f} - NEUTRAL")
st.write(f"{toxicProb:.4f} - TOXIC")
|