Spaces:
Runtime error
Runtime error
File size: 4,052 Bytes
4b4d3c0 809559e 4b4d3c0 c193e45 4b4d3c0 809559e 4b4d3c0 c193e45 4b4d3c0 809559e 4b4d3c0 809559e 684d14e 4b4d3c0 809559e 4b4d3c0 15103fe 809559e 15103fe 809559e 4b4d3c0 15103fe 809559e 4b4d3c0 809559e ffe0ff7 809559e 4b4d3c0 809559e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
# Function to load the pre-trained model
def load_finetune_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
return tokenizer, model
def load_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_pipeline = pipeline("sentiment-analysis", tokenizer=tokenizer, model=model)
return sentiment_pipeline
# Streamlit app
st.title("Multi-label Toxicity Detection App")
st.write("Enter a text and select the fine-tuned model to get the toxicity analysis.")
# Input text
default_text = "I will kill you if you do not give me my pop tarts."
text = st.text_input("Enter your text:", value=default_text)
category = {'LABEL_0': 'toxic', 'LABEL_1': 'severe_toxic', 'LABEL_2': 'obscene', 'LABEL_3': 'threat', 'LABEL_4': 'insult', 'LABEL_5': 'identity_hate'}
# Model selection
model_options = {
"Olivernyu/finetuned_bert_base_uncased": {
"description": "This model detects different types of toxicity like threats, obscenity, insults, and identity-based hate in text.",
},
"distilbert-base-uncased-finetuned-sst-2-english": {
"labels": ["NEGATIVE", "POSITIVE"],
"description": "This model classifies text into positive or negative sentiment. It is based on DistilBERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.",
},
"textattack/bert-base-uncased-SST-2": {
"labels": ["LABEL_0", "LABEL_1"],
"description": "This model classifies text into positive(LABEL_1) or negative(LABEL_0) sentiment. It is based on BERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.",
},
"cardiffnlp/twitter-roberta-base-sentiment": {
"labels": ["LABEL_0", "LABEL_1", "LABEL_2"],
"description": "This model classifies tweets into negative (LABEL_0), neutral(LABEL_1), or positive(LABEL_2) sentiment. It is based on RoBERTa and fine-tuned on a large dataset of tweets.",
},
}
selected_model = st.selectbox("Choose a fine-tuned model:", model_options)
st.write("### Model Information")
st.write(f"**Description:** {model_options[selected_model]['description']}")
# Load the model and perform toxicity analysis
if st.button("Analyze"):
if not text:
st.write("Please enter a text.")
else:
with st.spinner("Analyzing toxicity..."):
if selected_model == "Olivernyu/finetuned_bert_base_uncased":
toxicity_detector = load_model(selected_model)
outputs = toxicity_detector(text, top_k=2)
category_names = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
results = []
for item in outputs:
results.append((category[item['label']], item['score']))
# Create a table with the input text (or a portion of it), the highest toxicity class, and its probability
table_data = {
"Text (portion)": [text[:50]],
f"{results[0][0]}": results[0][1],
f"{results[1][0]}": results[1][1]
}
table_df = pd.DataFrame(table_data)
st.table(table_df)
else:
sentiment_pipeline = load_model(selected_model)
result = sentiment_pipeline(text)
st.write(f"Sentiment: {result[0]['label']} (confidence: {result[0]['score']:.2f})")
if result[0]['label'] in ['POSITIVE', 'LABEL_1'] and result[0]['score']> 0.9:
st.balloons()
elif result[0]['label'] in ['NEGATIVE', 'LABEL_0'] and result[0]['score']> 0.9:
st.error("Hater detected.")
else:
st.write("Enter a text and click 'Analyze' to perform toxicity analysis.")
|