import json import torch from transformers import BertTokenizer, BertForSequenceClassification import gradio as gr model_name = "ifmain/ModerationBERT-En-02" tokenizer = BertTokenizer.from_pretrained(model_name) model = BertForSequenceClassification.from_pretrained(model_name, num_labels=18) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) categories = [ 'harassment', 'harassment_threatening', 'hate', 'hate_threatening', 'self_harm', 'self_harm_instructions', 'self_harm_intent', 'sexual', 'sexual_minors', 'violence', 'violence_graphic', 'self-harm', 'sexual/minors', 'hate/threatening', 'violence/graphic', 'self-harm/intent', 'self-harm/instructions', 'harassment/threatening' ] def predict_moderation(text): encoding = tokenizer.encode_plus( text, add_special_tokens=True, max_length=128, return_token_type_ids=False, padding='max_length', truncation=True, return_attention_mask=True, return_tensors='pt' ) input_ids = encoding['input_ids'].to(device) attention_mask = encoding['attention_mask'].to(device) model.eval() with torch.no_grad(): outputs = model(input_ids, attention_mask=attention_mask) probs = torch.sigmoid(outputs.logits)[0].cpu().numpy() category_scores = {categories[i]: float(probs[i]) for i in range(len(categories))} detected = any(prob > 0.5 for prob in probs) return category_scores, str(detected) iface = gr.Interface( fn=predict_moderation, inputs=gr.Textbox(label="Enter text"), outputs=[ gr.Label(label="Ratings by category"), gr.Label(label="Was a violation detected?") ], title="Text moderation", description="Enter text to check it for content violations (ModerationBERT-En-02 model)." ) iface.launch()