File size: 4,052 Bytes
4b4d3c0
809559e
 
4b4d3c0
 
c193e45
4b4d3c0
 
809559e
4b4d3c0
c193e45
 
 
 
 
 
4b4d3c0
809559e
 
4b4d3c0
 
809559e
684d14e
4b4d3c0
809559e
 
 
4b4d3c0
15103fe
809559e
 
 
15103fe
 
 
 
 
 
 
 
 
 
 
 
 
809559e
4b4d3c0
15103fe
 
 
809559e
4b4d3c0
 
 
 
809559e
 
ffe0ff7
809559e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b4d3c0
809559e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Function to load the pre-trained model
def load_finetune_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    return tokenizer, model

def load_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    sentiment_pipeline = pipeline("sentiment-analysis", tokenizer=tokenizer, model=model)
    return sentiment_pipeline

# Streamlit app
st.title("Multi-label Toxicity Detection App")
st.write("Enter a text and select the fine-tuned model to get the toxicity analysis.")

# Input text
default_text = "I will kill you if you do not give me my pop tarts."
text = st.text_input("Enter your text:", value=default_text)

category = {'LABEL_0': 'toxic', 'LABEL_1': 'severe_toxic', 'LABEL_2': 'obscene', 'LABEL_3': 'threat', 'LABEL_4': 'insult', 'LABEL_5': 'identity_hate'}


# Model selection
model_options = {
    "Olivernyu/finetuned_bert_base_uncased": {
        "description": "This model detects different types of toxicity like threats, obscenity, insults, and identity-based hate in text.",
    },
    "distilbert-base-uncased-finetuned-sst-2-english": {
        "labels": ["NEGATIVE", "POSITIVE"],
        "description": "This model classifies text into positive or negative sentiment. It is based on DistilBERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.",
    },
    "textattack/bert-base-uncased-SST-2": {
        "labels": ["LABEL_0", "LABEL_1"],
        "description": "This model classifies text into positive(LABEL_1) or negative(LABEL_0) sentiment. It is based on BERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.",
    },
    "cardiffnlp/twitter-roberta-base-sentiment": {
        "labels": ["LABEL_0", "LABEL_1", "LABEL_2"],
        "description": "This model classifies tweets into negative (LABEL_0), neutral(LABEL_1), or positive(LABEL_2) sentiment. It is based on RoBERTa and fine-tuned on a large dataset of tweets.",
    },
}
selected_model = st.selectbox("Choose a fine-tuned model:", model_options)

st.write("### Model Information")
st.write(f"**Description:** {model_options[selected_model]['description']}")

# Load the model and perform toxicity analysis
if st.button("Analyze"):
    if not text:
        st.write("Please enter a text.")
    else:
        with st.spinner("Analyzing toxicity..."):
            if selected_model == "Olivernyu/finetuned_bert_base_uncased":
                toxicity_detector = load_model(selected_model)
                outputs = toxicity_detector(text, top_k=2)
                category_names = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
                results = []
                for item in outputs:
                    results.append((category[item['label']], item['score']))

                # Create a table with the input text (or a portion of it), the highest toxicity class, and its probability
                table_data = {
                    "Text (portion)": [text[:50]],
                    f"{results[0][0]}": results[0][1],
                    f"{results[1][0]}": results[1][1]
                }
                table_df = pd.DataFrame(table_data)
                st.table(table_df)
            else:
                sentiment_pipeline = load_model(selected_model)
                result = sentiment_pipeline(text)
                st.write(f"Sentiment: {result[0]['label']} (confidence: {result[0]['score']:.2f})")
                if result[0]['label'] in ['POSITIVE', 'LABEL_1'] and result[0]['score']> 0.9:
                    st.balloons()
                elif result[0]['label'] in ['NEGATIVE', 'LABEL_0'] and result[0]['score']> 0.9:
                    st.error("Hater detected.")
else:
    st.write("Enter a text and click 'Analyze' to perform toxicity analysis.")