HyperX-Sentience's picture
Update app.py
236e362 verified
raw
history blame
2.03 kB
import os
os.system("pip install torch transformers gradio matplotlib")
import torch
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
torch.set_num_threads(torch.get_num_threads())
# Load the trained model and tokenizer from Hugging Face Hub
model_path = "HyperX-Sentience/RogueBERT-Toxicity-85K"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Move the model to CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Define toxicity labels
labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
def predict_toxicity(comment):
"""Predicts the toxicity levels of a given comment."""
inputs = tokenizer(comment, truncation=True, padding="max_length", max_length=128, return_tensors="pt")
inputs = {key: val.to(device) for key, val in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.sigmoid(outputs.logits).cpu().numpy()[0]
return {labels[i]: float(probabilities[i]) for i in range(len(labels))}
def format_toxicity_data(comment):
"""Formats the toxicity scores for a modern bar graph."""
scores = predict_toxicity(comment)
df = pd.DataFrame({"Category": list(scores.keys()), "Score": list(scores.values())})
return df
# Gradio interface
demo = gr.Interface(
fn=format_toxicity_data,
inputs=gr.Textbox(label="Enter a comment:"),
outputs=gr.BarPlot(
value=None,
x="Category",
y="Score",
title="Toxicity Analysis",
y_lim=[0, 1],
color="blue",
label="Toxicity Scores",
interactive=False
),
title="Toxicity Detection with RogueBERT",
description="Enter a comment to analyze its toxicity levels. The results will be displayed as a modern bar chart."
)
demo.launch()