Moderation-Demo / app.py
DarwinAnim8or's picture
Create app.py
5841603
# Import gradio and transformers libraries
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# Load the small deberta models for hate and offensive speech detection
hate_model = AutoModelForSequenceClassification.from_pretrained("KoalaAI/HateSpeechDetector")
hate_tokenizer = AutoTokenizer.from_pretrained("KoalaAI/HateSpeechDetector")
offensive_model = AutoModelForSequenceClassification.from_pretrained("KoalaAI/OffensiveSpeechDetector")
offensive_tokenizer = AutoTokenizer.from_pretrained("KoalaAI/OffensiveSpeechDetector")
# Define a function that takes an input text and returns the scores from the models
def get_scores(text):
# Tokenize and encode the input text
hate_input = hate_tokenizer(text, return_tensors="pt")
offensive_input = offensive_tokenizer(text, return_tensors="pt")
# Get the logits from the models
hate_logits = hate_model(**hate_input).logits
offensive_logits = offensive_model(**offensive_input).logits
# Apply softmax to get probabilities
hate_probs = hate_logits.softmax(dim=1)
offensive_probs = offensive_logits.softmax(dim=1)
# Get the labels from the models
hate_labels = hate_model.config.id2label
offensive_labels = offensive_model.config.id2label
# Format the output as a dictionary of scores
output = {}
output["Hate speech"] = {hate_labels[i]: round(p.item(), 4) for i, p in enumerate(hate_probs[0])}
output["Offensive speech"] = {offensive_labels[i]: round(p.item(), 4) for i, p in enumerate(offensive_probs[0])}
return output
# Create a gradio interface with a text input and a json output
iface = gr.Interface(fn=get_scores, inputs="text", outputs="json")
# Launch the interface
iface.launch()