Spaces:

KavinduHansaka
/

Toxic_Comment_Classifier

Sleeping

App Files Files Community

KavinduHansaka commited on May 15

Commit

37d03fb

verified ·

1 Parent(s): 8d41ba9

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -45

app.py CHANGED Viewed

@@ -3,69 +3,81 @@ import pandas as pd
 from detoxify import Detoxify
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
-import numpy as np
 import io
-# Load Detoxify multilingual model
-tox_model = Detoxify('multilingual')
-# Load AI detector model
 ai_tokenizer = AutoTokenizer.from_pretrained("openai-community/roberta-base-openai-detector")
 ai_model = AutoModelForSequenceClassification.from_pretrained("openai-community/roberta-base-openai-detector")
 # Thresholds
 TOXICITY_THRESHOLD = 0.7
-AI_THRESHOLD = 0.5  # If >0.5, it's likely AI-generated
-def detect_ai_generated(text):
     with torch.no_grad():
         inputs = ai_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
         logits = ai_model(**inputs).logits
-        probs = torch.sigmoid(logits).squeeze().item()
-    return round(probs, 4)
-def process_input(file):
-    df = pd.read_csv(file.name)
-    if 'comment' not in df.columns:
-        return "CSV must contain a 'comment' column."
-    comments = df['comment'].astype(str).tolist()
-    tox_results = tox_model.predict(comments)
-    tox_df = pd.DataFrame(tox_results, index=comments).round(4)
-    # Format columns
-    tox_df.columns = [col.replace("_", " ").title().replace(" ", "_") for col in tox_df.columns]
-    tox_df.columns = [col.replace("_", " ") for col in tox_df.columns]
-    # Add warnings
-    tox_df["⚠️ Warning"] = tox_df.apply(lambda row: "⚠️ High Risk" if any(score > TOXICITY_THRESHOLD for score in row) else "✅ Safe", axis=1)
-    # Add AI detection
-    tox_df["🧪 AI Probability"] = [detect_ai_generated(c) for c in tox_df.index]
-    tox_df["🧪 AI Detection"] = tox_df["🧪 AI Probability"].apply(lambda x: "🤖 Likely AI" if x > AI_THRESHOLD else "🧍 Human")
-    # Store downloadable CSV
-    csv_data = tox_df.copy()
-    csv_data.insert(0, "Comment", tox_df.index)
-    csv_bytes = csv_data.to_csv(index=False).encode()
-    return tox_df, ("toxicity_report.csv", csv_bytes)
-# Gradio UI
-upload = gr.File(label="📥 Upload .CSV (Must contain 'comment' column)")
-output_table = gr.Dataframe(label="📊 Predictions (Multilingual + AI Detection)")
-download = gr.File(label="📤 Download Predictions")
-app = gr.Interface(
-    fn=process_input,
-    inputs=upload,
-    outputs=[output_table, download],
-    title="🌍 Toxic Comment Classifier + AI Text Detector",
-    description="""
-📥 Upload a .csv file with a 'comment' column.
-🔍 Each comment will be scored for toxicity (Multilingual model) and AI-generation probability (RoBERTa-based).
-📤 Download the full report as .csv.
-"""
-)
 if __name__ == "__main__":
     app.launch()

 from detoxify import Detoxify
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import io
+# Load models
+tox_model = Detoxify('multilingual')  # 🌍 Multilingual toxicity classifier
 ai_tokenizer = AutoTokenizer.from_pretrained("openai-community/roberta-base-openai-detector")
 ai_model = AutoModelForSequenceClassification.from_pretrained("openai-community/roberta-base-openai-detector")
 # Thresholds
 TOXICITY_THRESHOLD = 0.7
+AI_THRESHOLD = 0.5
+def detect_ai(text):
     with torch.no_grad():
         inputs = ai_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
         logits = ai_model(**inputs).logits
+        prob = torch.sigmoid(logits).squeeze().item()
+    return round(prob, 4)
+def classify_comments(comment_list):
+    results = tox_model.predict(comment_list)
+    df = pd.DataFrame(results, index=comment_list).round(4)
+    # Capitalize columns
+    df.columns = [col.replace("_", " ").title().replace(" ", "_") for col in df.columns]
+    df.columns = [col.replace("_", " ") for col in df.columns]
+    # Add warning & AI detection
+    df["⚠️ Warning"] = df.apply(lambda row: "⚠️ High Risk" if any(score > TOXICITY_THRESHOLD for score in row) else "✅ Safe", axis=1)
+    df["🧪 AI Probability"] = [detect_ai(c) for c in df.index]
+    df["🧪 AI Detection"] = df["🧪 AI Probability"].apply(lambda x: "🤖 Likely AI" if x > AI_THRESHOLD else "🧍 Human")
+    return df
+def classify_from_textbox(text_input):
+    comment_list = [c.strip() for c in text_input.strip().split('\n') if c.strip()]
+    if not comment_list:
+        return "Please enter at least one comment.", None
+    df = classify_comments(comment_list)
+    csv_data = df.copy()
+    csv_data.insert(0, "Comment", df.index)
+    return df, ("toxicity_predictions.csv", csv_data.to_csv(index=False).encode())
+def classify_from_csv(file_obj):
+    df = pd.read_csv(file_obj.name)
+    if 'comment' not in df.columns:
+        return "CSV must contain a 'comment' column.", None
+    comment_list = df['comment'].astype(str).tolist()
+    df = classify_comments(comment_list)
+    csv_data = df.copy()
+    csv_data.insert(0, "Comment", df.index)
+    return df, ("toxicity_predictions.csv", csv_data.to_csv(index=False).encode())
+# Gradio Interface
+text_input = gr.Textbox(lines=8, label="💬 Paste Comments (one per line)")
+csv_input = gr.File(label="📥 Or Upload .CSV with 'comment' column")
+output_table = gr.Dataframe(label="📊 Predictions")
+download_button = gr.File(label="📤 Download CSV")
+with gr.Blocks(title="Toxicity & AI Comment Detector") as app:
+    gr.Markdown("## 🌍 Toxic Comment & AI Detector\nDetects multilingual toxicity and whether the text is AI-generated.")
+    with gr.Tab("📝 Paste Text"):
+        text = text_input
+        btn1 = gr.Button("Analyze Text Comments")
+        output1 = output_table
+        download1 = download_button
+    with gr.Tab("📁 Upload CSV"):
+        csv = csv_input
+        btn2 = gr.Button("Analyze CSV File")
+        output2 = output_table
+        download2 = download_button
+    btn1.click(fn=classify_from_textbox, inputs=text, outputs=[output1, download1])
+    btn2.click(fn=classify_from_csv, inputs=csv, outputs=[output2, download2])
 if __name__ == "__main__":
     app.launch()