svsaurav95 commited on
Commit
829eb6a
·
verified ·
1 Parent(s): 6e2e9b7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import re
4
+
5
+ sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
6
+
7
+ moderation_guidelines = """
8
+ - Allow positive messages
9
+ - Block cuss words
10
+ - Allow negative comments about individuals but block negative comments against a community
11
+ - Block words: Darren
12
+ """
13
+
14
+
15
+ default_cuss_words = {
16
+ "damn", "hell", "shit", "fuck", "ass", "bastard", "bitch", "bollocks", "bugger",
17
+ "bullshit", "crap", "dammit", "douche", "dumbass", "faggot", "jackass", "jerk",
18
+ "motherfucker", "piss", "prick", "slut", "son of a bitch", "twat", "wanker"
19
+ }
20
+
21
+ community_terms = {"religion", "race", "ethnicity", "group", "community", "gender", "china"}
22
+
23
+ def extract_blocked_words(guidelines):
24
+ """Extract blocked words from guidelines"""
25
+ match = re.search(r"block words:\s*(.*)", guidelines.lower())
26
+ return set(match.group(1).split(",")) if match else set()
27
+
28
+ def moderate_message(message, guidelines):
29
+ """Moderates a message based on sentiment and dynamic guidelines."""
30
+
31
+ sentiment = sentiment_pipeline(message)[0]['label']
32
+
33
+
34
+ blocked_words = extract_blocked_words(guidelines)
35
+
36
+
37
+ allow_positive = "allow positive" in guidelines.lower()
38
+ block_cuss_words = "block cuss" in guidelines.lower()
39
+ allow_negative_personal = "allow negative comments about individuals" in guidelines.lower()
40
+ block_negative_community = "block negative comments against a community" in guidelines.lower()
41
+
42
+
43
+ words = set(re.findall(r'\w+', message.lower()))
44
+ if block_cuss_words and (words & default_cuss_words):
45
+ return "❌ Message Blocked: Contains inappropriate language."
46
+
47
+ if words & blocked_words:
48
+ return "🚫 Message Blocked: Contains restricted words."
49
+ if sentiment == "POSITIVE" and allow_positive:
50
+ return f"✅ Allowed (Positive): {message}"
51
+
52
+ if sentiment == "NEGATIVE":
53
+ if any(word in message.lower() for word in community_terms) and block_negative_community:
54
+ return "🚫 Message Blocked: Negative content targeting a community."
55
+ elif allow_negative_personal:
56
+ return f"⚠️ Allowed (Negative - Personal Attack): {message}"
57
+
58
+ return f"✅ Allowed (Neutral): {message}"
59
+
60
+ with gr.Blocks() as demo:
61
+ gr.Markdown("### 🛡️ AI-Powered Moderation System")
62
+
63
+ guidelines_input = gr.Textbox(value=moderation_guidelines, label="Moderation Guidelines (Admins Can Update)", lines=4)
64
+
65
+ with gr.Row():
66
+ msg_input = gr.Textbox(label="Enter Message")
67
+ msg_output = gr.Textbox(label="Moderation Result", interactive=False)
68
+
69
+ moderate_btn = gr.Button("Check Message")
70
+ moderate_btn.click(moderate_message, inputs=[msg_input, guidelines_input], outputs=[msg_output])
71
+
72
+ demo.launch()