Update app.py
Browse files
app.py
CHANGED
@@ -3,14 +3,12 @@ import pandas as pd
|
|
3 |
from detoxify import Detoxify
|
4 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
5 |
import torch
|
6 |
-
import io
|
7 |
|
8 |
# Load models
|
9 |
-
tox_model = Detoxify('multilingual')
|
10 |
ai_tokenizer = AutoTokenizer.from_pretrained("openai-community/roberta-base-openai-detector")
|
11 |
ai_model = AutoModelForSequenceClassification.from_pretrained("openai-community/roberta-base-openai-detector")
|
12 |
|
13 |
-
# Thresholds
|
14 |
TOXICITY_THRESHOLD = 0.7
|
15 |
AI_THRESHOLD = 0.5
|
16 |
|
@@ -24,60 +22,49 @@ def detect_ai(text):
|
|
24 |
def classify_comments(comment_list):
|
25 |
results = tox_model.predict(comment_list)
|
26 |
df = pd.DataFrame(results, index=comment_list).round(4)
|
27 |
-
|
28 |
-
# Capitalize columns
|
29 |
df.columns = [col.replace("_", " ").title().replace(" ", "_") for col in df.columns]
|
30 |
df.columns = [col.replace("_", " ") for col in df.columns]
|
31 |
-
|
32 |
-
# Add warning & AI detection
|
33 |
df["⚠️ Warning"] = df.apply(lambda row: "⚠️ High Risk" if any(score > TOXICITY_THRESHOLD for score in row) else "✅ Safe", axis=1)
|
34 |
df["🧪 AI Probability"] = [detect_ai(c) for c in df.index]
|
35 |
df["🧪 AI Detection"] = df["🧪 AI Probability"].apply(lambda x: "🤖 Likely AI" if x > AI_THRESHOLD else "🧍 Human")
|
36 |
-
|
37 |
return df
|
38 |
|
39 |
-
def
|
40 |
-
comment_list = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
if not comment_list:
|
42 |
-
return "Please
|
43 |
-
df = classify_comments(comment_list)
|
44 |
-
csv_data = df.copy()
|
45 |
-
csv_data.insert(0, "Comment", df.index)
|
46 |
-
return df, ("toxicity_predictions.csv", csv_data.to_csv(index=False).encode())
|
47 |
|
48 |
-
def classify_from_csv(file_obj):
|
49 |
-
df = pd.read_csv(file_obj.name)
|
50 |
-
if 'comment' not in df.columns:
|
51 |
-
return "CSV must contain a 'comment' column.", None
|
52 |
-
comment_list = df['comment'].astype(str).tolist()
|
53 |
df = classify_comments(comment_list)
|
54 |
csv_data = df.copy()
|
55 |
csv_data.insert(0, "Comment", df.index)
|
56 |
return df, ("toxicity_predictions.csv", csv_data.to_csv(index=False).encode())
|
57 |
|
58 |
-
#
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
download_button = gr.File(label="📤 Download CSV")
|
63 |
|
64 |
-
with gr.
|
65 |
-
|
66 |
-
|
67 |
-
with gr.Tab("📝 Paste Text"):
|
68 |
-
text = text_input
|
69 |
-
btn1 = gr.Button("Analyze Text Comments")
|
70 |
-
output1 = output_table
|
71 |
-
download1 = download_button
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
output2 = output_table
|
77 |
-
download2 = download_button
|
78 |
|
79 |
-
|
80 |
-
btn2.click(fn=classify_from_csv, inputs=csv, outputs=[output2, download2])
|
81 |
|
82 |
if __name__ == "__main__":
|
83 |
app.launch()
|
|
|
3 |
from detoxify import Detoxify
|
4 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
5 |
import torch
|
|
|
6 |
|
7 |
# Load models
|
8 |
+
tox_model = Detoxify('multilingual')
|
9 |
ai_tokenizer = AutoTokenizer.from_pretrained("openai-community/roberta-base-openai-detector")
|
10 |
ai_model = AutoModelForSequenceClassification.from_pretrained("openai-community/roberta-base-openai-detector")
|
11 |
|
|
|
12 |
TOXICITY_THRESHOLD = 0.7
|
13 |
AI_THRESHOLD = 0.5
|
14 |
|
|
|
22 |
def classify_comments(comment_list):
|
23 |
results = tox_model.predict(comment_list)
|
24 |
df = pd.DataFrame(results, index=comment_list).round(4)
|
|
|
|
|
25 |
df.columns = [col.replace("_", " ").title().replace(" ", "_") for col in df.columns]
|
26 |
df.columns = [col.replace("_", " ") for col in df.columns]
|
|
|
|
|
27 |
df["⚠️ Warning"] = df.apply(lambda row: "⚠️ High Risk" if any(score > TOXICITY_THRESHOLD for score in row) else "✅ Safe", axis=1)
|
28 |
df["🧪 AI Probability"] = [detect_ai(c) for c in df.index]
|
29 |
df["🧪 AI Detection"] = df["🧪 AI Probability"].apply(lambda x: "🤖 Likely AI" if x > AI_THRESHOLD else "🧍 Human")
|
|
|
30 |
return df
|
31 |
|
32 |
+
def run_classification(text_input, csv_file):
|
33 |
+
comment_list = []
|
34 |
+
|
35 |
+
# From text input
|
36 |
+
if text_input.strip():
|
37 |
+
comment_list += [c.strip() for c in text_input.strip().split('\n') if c.strip()]
|
38 |
+
|
39 |
+
# From CSV
|
40 |
+
if csv_file:
|
41 |
+
df = pd.read_csv(csv_file.name)
|
42 |
+
if 'comment' not in df.columns:
|
43 |
+
return "CSV must contain a 'comment' column.", None
|
44 |
+
comment_list += df['comment'].astype(str).tolist()
|
45 |
+
|
46 |
if not comment_list:
|
47 |
+
return "Please provide comments via text or CSV.", None
|
|
|
|
|
|
|
|
|
48 |
|
|
|
|
|
|
|
|
|
|
|
49 |
df = classify_comments(comment_list)
|
50 |
csv_data = df.copy()
|
51 |
csv_data.insert(0, "Comment", df.index)
|
52 |
return df, ("toxicity_predictions.csv", csv_data.to_csv(index=False).encode())
|
53 |
|
54 |
+
# UI layout
|
55 |
+
with gr.Blocks(title="🌍 Toxic Comment & AI Detector") as app:
|
56 |
+
gr.Markdown("## 🌍 Toxic Comment & AI Detector")
|
57 |
+
gr.Markdown("Detects multilingual toxicity and whether the comment is AI-generated. Paste text or upload CSV.")
|
|
|
58 |
|
59 |
+
with gr.Row():
|
60 |
+
text_input = gr.Textbox(lines=8, label="💬 Enter Comments (one per line)")
|
61 |
+
file_input = gr.File(label="📥 Upload CSV (with 'comment' column)")
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
+
submit_btn = gr.Button("🔍 Analyze Comments")
|
64 |
+
output_table = gr.Dataframe(label="📊 Results")
|
65 |
+
download_btn = gr.File(label="📤 Download CSV")
|
|
|
|
|
66 |
|
67 |
+
submit_btn.click(fn=run_classification, inputs=[text_input, file_input], outputs=[output_table, download_btn])
|
|
|
68 |
|
69 |
if __name__ == "__main__":
|
70 |
app.launch()
|