Spaces:
Sleeping
Sleeping
File size: 3,369 Bytes
829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a b064020 829eb6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
from transformers import pipeline
import re
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
moderation_guidelines = """
- Allow positive messages
- Block cuss words
- Allow negative comments about individuals but block negative comments against a community
- Block personal names
"""
default_cuss_words = {"damn", "hell", "shit", "fuck", "ass", "bastard", "bitch", "bollocks", "bugger",
"bullshit", "crap", "dammit", "douche", "dumbass", "faggot", "jackass", "jerk",
"motherfucker", "piss", "prick", "slut", "son of a bitch", "twat", "wanker"}
community_terms = {"religion", "race", "ethnicity", "group", "community", "gender"}
def extract_blocked_words(guidelines):
"""Extracts blocked words from moderation guidelines."""
match = re.search(r"block words:\s*(.*)", guidelines.lower())
return {word.strip() for word in match.group(1).split(",") if word.strip()} if match else set()
def moderate_message(message, guidelines):
"""Moderates a message based on sentiment and dynamic moderation rules."""
sentiment = sentiment_pipeline(message)[0]['label']
blocked_words = extract_blocked_words(guidelines)
allow_positive = "allow positive" in guidelines.lower()
block_cuss_words = "block cuss" in guidelines.lower()
allow_negative_personal = "allow negative comments about individuals" in guidelines.lower()
block_negative_community = "block negative comments against a community" in guidelines.lower()
block_personal_names = "block personal names" in guidelines.lower()
words = set(re.findall(r'\w+', message.lower()))
# 1. Block Cuss Words
if block_cuss_words and words & default_cuss_words:
return "β Message Blocked: Contains inappropriate language."
# 2. Block Dynamically Defined Words
if words & blocked_words:
return "π« Message Blocked: Contains restricted words."
# 3. Block Personal Names Dynamically
if block_personal_names:
entities = ner_pipeline(message)
for entity in entities:
if entity['entity_group'] == 'PER':
return "π« Message Blocked: Contains personal names."
if sentiment == "POSITIVE" and allow_positive:
return f"β
Allowed (Positive): {message}"
if sentiment == "NEGATIVE":
if any(word in message.lower() for word in community_terms) and block_negative_community:
return "π« Message Blocked: Negative content targeting a community."
elif allow_negative_personal:
return f"β οΈ Allowed (Negative - Personal Attack): {message}"
return f"β
Allowed (Neutral): {message}"
with gr.Blocks() as demo:
gr.Markdown("### π‘οΈ AI-Powered Moderation System")
guidelines_input = gr.Textbox(value=moderation_guidelines, label="Moderation Guidelines (Admins Can Update)", lines=6)
with gr.Row():
msg_input = gr.Textbox(label="Enter Message")
msg_output = gr.Textbox(label="Moderation Result", interactive=False)
moderate_btn = gr.Button("Check Message")
moderate_btn.click(moderate_message, inputs=[msg_input, guidelines_input], outputs=[msg_output])
demo.launch()
|