KavinduHansaka commited on
Commit
debc16e
·
verified ·
1 Parent(s): 37d03fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -40
app.py CHANGED
@@ -3,14 +3,12 @@ import pandas as pd
3
  from detoxify import Detoxify
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
  import torch
6
- import io
7
 
8
  # Load models
9
- tox_model = Detoxify('multilingual') # 🌍 Multilingual toxicity classifier
10
  ai_tokenizer = AutoTokenizer.from_pretrained("openai-community/roberta-base-openai-detector")
11
  ai_model = AutoModelForSequenceClassification.from_pretrained("openai-community/roberta-base-openai-detector")
12
 
13
- # Thresholds
14
  TOXICITY_THRESHOLD = 0.7
15
  AI_THRESHOLD = 0.5
16
 
@@ -24,60 +22,49 @@ def detect_ai(text):
24
  def classify_comments(comment_list):
25
  results = tox_model.predict(comment_list)
26
  df = pd.DataFrame(results, index=comment_list).round(4)
27
-
28
- # Capitalize columns
29
  df.columns = [col.replace("_", " ").title().replace(" ", "_") for col in df.columns]
30
  df.columns = [col.replace("_", " ") for col in df.columns]
31
-
32
- # Add warning & AI detection
33
  df["⚠️ Warning"] = df.apply(lambda row: "⚠️ High Risk" if any(score > TOXICITY_THRESHOLD for score in row) else "✅ Safe", axis=1)
34
  df["🧪 AI Probability"] = [detect_ai(c) for c in df.index]
35
  df["🧪 AI Detection"] = df["🧪 AI Probability"].apply(lambda x: "🤖 Likely AI" if x > AI_THRESHOLD else "🧍 Human")
36
-
37
  return df
38
 
39
- def classify_from_textbox(text_input):
40
- comment_list = [c.strip() for c in text_input.strip().split('\n') if c.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
41
  if not comment_list:
42
- return "Please enter at least one comment.", None
43
- df = classify_comments(comment_list)
44
- csv_data = df.copy()
45
- csv_data.insert(0, "Comment", df.index)
46
- return df, ("toxicity_predictions.csv", csv_data.to_csv(index=False).encode())
47
 
48
- def classify_from_csv(file_obj):
49
- df = pd.read_csv(file_obj.name)
50
- if 'comment' not in df.columns:
51
- return "CSV must contain a 'comment' column.", None
52
- comment_list = df['comment'].astype(str).tolist()
53
  df = classify_comments(comment_list)
54
  csv_data = df.copy()
55
  csv_data.insert(0, "Comment", df.index)
56
  return df, ("toxicity_predictions.csv", csv_data.to_csv(index=False).encode())
57
 
58
- # Gradio Interface
59
- text_input = gr.Textbox(lines=8, label="💬 Paste Comments (one per line)")
60
- csv_input = gr.File(label="📥 Or Upload .CSV with 'comment' column")
61
- output_table = gr.Dataframe(label="📊 Predictions")
62
- download_button = gr.File(label="📤 Download CSV")
63
 
64
- with gr.Blocks(title="Toxicity & AI Comment Detector") as app:
65
- gr.Markdown("## 🌍 Toxic Comment & AI Detector\nDetects multilingual toxicity and whether the text is AI-generated.")
66
-
67
- with gr.Tab("📝 Paste Text"):
68
- text = text_input
69
- btn1 = gr.Button("Analyze Text Comments")
70
- output1 = output_table
71
- download1 = download_button
72
 
73
- with gr.Tab("📁 Upload CSV"):
74
- csv = csv_input
75
- btn2 = gr.Button("Analyze CSV File")
76
- output2 = output_table
77
- download2 = download_button
78
 
79
- btn1.click(fn=classify_from_textbox, inputs=text, outputs=[output1, download1])
80
- btn2.click(fn=classify_from_csv, inputs=csv, outputs=[output2, download2])
81
 
82
  if __name__ == "__main__":
83
  app.launch()
 
3
  from detoxify import Detoxify
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
  import torch
 
6
 
7
  # Load models
8
+ tox_model = Detoxify('multilingual')
9
  ai_tokenizer = AutoTokenizer.from_pretrained("openai-community/roberta-base-openai-detector")
10
  ai_model = AutoModelForSequenceClassification.from_pretrained("openai-community/roberta-base-openai-detector")
11
 
 
12
  TOXICITY_THRESHOLD = 0.7
13
  AI_THRESHOLD = 0.5
14
 
 
22
  def classify_comments(comment_list):
23
  results = tox_model.predict(comment_list)
24
  df = pd.DataFrame(results, index=comment_list).round(4)
 
 
25
  df.columns = [col.replace("_", " ").title().replace(" ", "_") for col in df.columns]
26
  df.columns = [col.replace("_", " ") for col in df.columns]
 
 
27
  df["⚠️ Warning"] = df.apply(lambda row: "⚠️ High Risk" if any(score > TOXICITY_THRESHOLD for score in row) else "✅ Safe", axis=1)
28
  df["🧪 AI Probability"] = [detect_ai(c) for c in df.index]
29
  df["🧪 AI Detection"] = df["🧪 AI Probability"].apply(lambda x: "🤖 Likely AI" if x > AI_THRESHOLD else "🧍 Human")
 
30
  return df
31
 
32
+ def run_classification(text_input, csv_file):
33
+ comment_list = []
34
+
35
+ # From text input
36
+ if text_input.strip():
37
+ comment_list += [c.strip() for c in text_input.strip().split('\n') if c.strip()]
38
+
39
+ # From CSV
40
+ if csv_file:
41
+ df = pd.read_csv(csv_file.name)
42
+ if 'comment' not in df.columns:
43
+ return "CSV must contain a 'comment' column.", None
44
+ comment_list += df['comment'].astype(str).tolist()
45
+
46
  if not comment_list:
47
+ return "Please provide comments via text or CSV.", None
 
 
 
 
48
 
 
 
 
 
 
49
  df = classify_comments(comment_list)
50
  csv_data = df.copy()
51
  csv_data.insert(0, "Comment", df.index)
52
  return df, ("toxicity_predictions.csv", csv_data.to_csv(index=False).encode())
53
 
54
+ # UI layout
55
+ with gr.Blocks(title="🌍 Toxic Comment & AI Detector") as app:
56
+ gr.Markdown("## 🌍 Toxic Comment & AI Detector")
57
+ gr.Markdown("Detects multilingual toxicity and whether the comment is AI-generated. Paste text or upload CSV.")
 
58
 
59
+ with gr.Row():
60
+ text_input = gr.Textbox(lines=8, label="💬 Enter Comments (one per line)")
61
+ file_input = gr.File(label="📥 Upload CSV (with 'comment' column)")
 
 
 
 
 
62
 
63
+ submit_btn = gr.Button("🔍 Analyze Comments")
64
+ output_table = gr.Dataframe(label="📊 Results")
65
+ download_btn = gr.File(label="📤 Download CSV")
 
 
66
 
67
+ submit_btn.click(fn=run_classification, inputs=[text_input, file_input], outputs=[output_table, download_btn])
 
68
 
69
  if __name__ == "__main__":
70
  app.launch()