Spaces:

lg3394
/

aimoderationproject

Running

lg3394 commited on Dec 15, 2024

Commit

3298b12

verified ·

1 Parent(s): cd880dc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -102,10 +102,13 @@ def moderate_text(user_text):
     # Toxic BERT Moderation (Hugging Face Model)
     toxic_result = toxic_bert(user_text)
-    toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed"  # Toxic BERT classifies as "LABEL_1" for toxic
     toxic_severity = toxic_result[0]['score']
-    # Explanation of severity score
     toxic_explanation = f"Toxic BERT classification: {toxic_classification}, Confidence: {toxic_severity:.2f}"
     return openai_moderation_result, anthropic_moderation_result, azure_moderation_result, toxic_explanation

     # Toxic BERT Moderation (Hugging Face Model)
     toxic_result = toxic_bert(user_text)
+    toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed"
+    # Adjust the threshold for toxic classification if necessary (e.g., block if score > 0.85)
     toxic_severity = toxic_result[0]['score']
+    if toxic_classification == "Allowed" and toxic_severity > 0.85:  # Set your threshold here
+        toxic_classification = "Blocked"
     toxic_explanation = f"Toxic BERT classification: {toxic_classification}, Confidence: {toxic_severity:.2f}"
     return openai_moderation_result, anthropic_moderation_result, azure_moderation_result, toxic_explanation