lg3394 commited on
Commit
3298b12
·
verified ·
1 Parent(s): cd880dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -102,10 +102,13 @@ def moderate_text(user_text):
102
 
103
  # Toxic BERT Moderation (Hugging Face Model)
104
  toxic_result = toxic_bert(user_text)
105
- toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed" # Toxic BERT classifies as "LABEL_1" for toxic
 
 
106
  toxic_severity = toxic_result[0]['score']
 
 
107
 
108
- # Explanation of severity score
109
  toxic_explanation = f"Toxic BERT classification: {toxic_classification}, Confidence: {toxic_severity:.2f}"
110
 
111
  return openai_moderation_result, anthropic_moderation_result, azure_moderation_result, toxic_explanation
 
102
 
103
  # Toxic BERT Moderation (Hugging Face Model)
104
  toxic_result = toxic_bert(user_text)
105
+ toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed"
106
+
107
+ # Adjust the threshold for toxic classification if necessary (e.g., block if score > 0.85)
108
  toxic_severity = toxic_result[0]['score']
109
+ if toxic_classification == "Allowed" and toxic_severity > 0.85: # Set your threshold here
110
+ toxic_classification = "Blocked"
111
 
 
112
  toxic_explanation = f"Toxic BERT classification: {toxic_classification}, Confidence: {toxic_severity:.2f}"
113
 
114
  return openai_moderation_result, anthropic_moderation_result, azure_moderation_result, toxic_explanation