moderation-2 / app.py
ifmain's picture
Update app.py
f9d050d verified
import json
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import gradio as gr
model_name = "ifmain/ModerationBERT-En-02"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=18)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
categories = [
'harassment', 'harassment_threatening', 'hate', 'hate_threatening',
'self_harm', 'self_harm_instructions', 'self_harm_intent', 'sexual',
'sexual_minors', 'violence', 'violence_graphic', 'self-harm',
'sexual/minors', 'hate/threatening', 'violence/graphic',
'self-harm/intent', 'self-harm/instructions', 'harassment/threatening'
]
def predict_moderation(text):
encoding = tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=128,
return_token_type_ids=False,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
input_ids = encoding['input_ids'].to(device)
attention_mask = encoding['attention_mask'].to(device)
model.eval()
with torch.no_grad():
outputs = model(input_ids, attention_mask=attention_mask)
probs = torch.sigmoid(outputs.logits)[0].cpu().numpy()
category_scores = {categories[i]: float(probs[i]) for i in range(len(categories))}
detected = any(prob > 0.5 for prob in probs)
return category_scores, str(detected)
iface = gr.Interface(
fn=predict_moderation,
inputs=gr.Textbox(label="Enter text"),
outputs=[
gr.Label(label="Ratings by category"),
gr.Label(label="Was a violation detected?")
],
title="Text moderation",
description="Enter text to check it for content violations (ModerationBERT-En-02 model)."
)
iface.launch()