Spaces:

lg3394
/

aimoderationproject

Running

App Files Files Community

lg3394 commited on Dec 15, 2024

Commit

df7192b

verified ·

1 Parent(s): 4156fc3

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -32

app.py CHANGED Viewed

@@ -7,8 +7,10 @@ from azure.ai.contentsafety.models import TextCategory
 from azure.core.credentials import AzureKeyCredential
 from azure.core.exceptions import HttpResponseError
 from azure.ai.contentsafety.models import AnalyzeTextOptions
-# Load OpenAI and Anthropic API Keys from environment variables
 openai.api_key = os.getenv("openaiapikey")
 anthropic_api_key = os.getenv("anthropickey")
@@ -17,6 +19,9 @@ client = Anthropic(api_key=anthropic_api_key)
 MODEL_NAME = "claude-3-haiku-20240307"
 # Function for Azure Content Safety analysis
 def analyze_text_azure(user_text):
     # Retrieve Azure keys from Hugging Face secrets (as environment variables)
@@ -36,40 +41,32 @@ def analyze_text_azure(user_text):
         return f"Error occurred with Azure Content Safety: {e}"
     # Extract moderation results
-    hate_result = next((item for item in response.categories_analysis if item.category == TextCategory.HATE), None)
-    self_harm_result = next((item for item in response.categories_analysis if item.category == TextCategory.SELF_HARM), None)
-    sexual_result = next((item for item in response.categories_analysis if item.category == TextCategory.SEXUAL), None)
-    violence_result = next((item for item in response.categories_analysis if item.category == TextCategory.VIOLENCE), None)
-    results = []
-    if hate_result:
-        results.append(f"Hate severity: {hate_result.severity}")
-    if self_harm_result:
-        results.append(f"SelfHarm severity: {self_harm_result.severity}")
-    if sexual_result:
-        results.append(f"Sexual severity: {sexual_result.severity}")
-    if violence_result:
-        results.append(f"Violence severity: {violence_result.severity}")
-    return "\n".join(results) if results else "No flagged content detected in Azure Content Safety."
 def moderate_text(user_text):
     # OpenAI Moderation
-    openai_moderation_result = "Error in OpenAI Moderation"
     try:
         response = openai.Moderation.create(input=user_text)
         moderation_categories = response["results"][0]["categories"]
         moderation_flagged = response["results"][0]["flagged"]
         if moderation_flagged:
-            openai_moderation_result = f"OpenAI flags the text for the following categories: {', '.join([category for category, flagged in moderation_categories.items() if flagged])}"
         else:
-            openai_moderation_result = "The text is not flagged for any moderation issues by OpenAI."
     except Exception as e:
-        openai_moderation_result = f"Error occurred with OpenAI: {e}"
     # Anthropic Moderation
-    anthropic_moderation_result = "Error in Anthropic Moderation"
     try:
         prompt_template = """
         You are a content moderation expert tasked with categorizing user-generated text.
@@ -88,25 +85,66 @@ def moderate_text(user_text):
             messages=[{"role": "user", "content": prompt}]
         ).content[0].text
-        anthropic_moderation_result = f"Anthropic's moderation result: {response}"
     except Exception as e:
-        anthropic_moderation_result = f"Error occurred with Anthropic: {e}"
     # Azure Content Safety Moderation
-    azure_moderation_result = analyze_text_azure(user_text)
-    return openai_moderation_result, anthropic_moderation_result, azure_moderation_result
 # Create the Gradio interface with updated input and output labels
 iface = gr.Interface(
     fn=moderate_text,
     inputs=gr.Textbox(lines=2, placeholder="Please write your text here..."),
-    outputs=[gr.Textbox(label="OpenAI"), gr.Textbox(label="Anthropic"), gr.Textbox(label="Microsoft Azure")],
     title="Content Moderation Tool",
-    description="Enter some text and get the moderation results from OpenAI, Anthropic, and Azure Content Safety."
 )
 if __name__ == "__main__":
-    iface.launch()

 from azure.core.credentials import AzureKeyCredential
 from azure.core.exceptions import HttpResponseError
 from azure.ai.contentsafety.models import AnalyzeTextOptions
+import matplotlib.pyplot as plt
+from transformers import pipeline
+# Load OpenAI, Anthropic API Keys from environment variables
 openai.api_key = os.getenv("openaiapikey")
 anthropic_api_key = os.getenv("anthropickey")
 MODEL_NAME = "claude-3-haiku-20240307"
+# Load Toxic BERT model from Hugging Face
+toxic_bert = pipeline("text-classification", model="unitary/toxic-bert")
 # Function for Azure Content Safety analysis
 def analyze_text_azure(user_text):
     # Retrieve Azure keys from Hugging Face secrets (as environment variables)
         return f"Error occurred with Azure Content Safety: {e}"
     # Extract moderation results
+    results = {}
+    for category in [TextCategory.HATE, TextCategory.SELF_HARM, TextCategory.SEXUAL, TextCategory.VIOLENCE]:
+        result = next((item for item in response.categories_analysis if item.category == category), None)
+        if result:
+            results[category] = result.severity
+    return results if results else {"No flagged content detected": 0}
 def moderate_text(user_text):
     # OpenAI Moderation
+    openai_result = {"Classification": "Error in OpenAI Moderation", "Severity": 0}
     try:
         response = openai.Moderation.create(input=user_text)
         moderation_categories = response["results"][0]["categories"]
         moderation_flagged = response["results"][0]["flagged"]
         if moderation_flagged:
+            openai_result["Classification"] = "Blocked"
+            openai_result["Severity"] = max([category for category, flagged in moderation_categories.items() if flagged], key=moderation_categories.get)
         else:
+            openai_result["Classification"] = "Allowed"
     except Exception as e:
+        openai_result = {"Classification": f"Error occurred with OpenAI: {e}", "Severity": 0}
     # Anthropic Moderation
+    anthropic_result = {"Classification": "Error in Anthropic Moderation", "Severity": 0}
     try:
         prompt_template = """
         You are a content moderation expert tasked with categorizing user-generated text.
             messages=[{"role": "user", "content": prompt}]
         ).content[0].text
+        if "blocked" in response.lower():
+            anthropic_result["Classification"] = "Blocked"
+            anthropic_result["Severity"] = 1  # Assigning severity for blocked content
+        else:
+            anthropic_result["Classification"] = "Allowed"
+            anthropic_result["Severity"] = 0
     except Exception as e:
+        anthropic_result = {"Classification": f"Error occurred with Anthropic: {e}", "Severity": 0}
     # Azure Content Safety Moderation
+    azure_result = analyze_text_azure(user_text)
+    # Toxic BERT Moderation (Hugging Face Model)
+    toxic_result = toxic_bert(user_text)
+    toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed"  # Toxic BERT classifies as "LABEL_1" for toxic
+    toxic_severity = toxic_result[0]['score']
+    # Combine results and generate bar chart
+    categories = ["OpenAI", "Anthropic", "Microsoft Azure", "Toxic BERT"]
+    classifications = [openai_result["Severity"], anthropic_result["Severity"], sum(azure_result.values()) / len(azure_result) if azure_result else 0, toxic_severity]
+    bar_chart = create_comparison_chart(categories, classifications)
+    # Safe text suggestion for blocked content
+    suggestions = ""
+    if openai_result["Classification"] == "Blocked":
+        suggestions += "OpenAI flagged the text for harmful content. Suggested Rephrase: 'Please use more respectful language.'\n"
+    if anthropic_result["Classification"] == "Blocked":
+        suggestions += "Anthropic flagged the text. Suggested Rephrase: 'Avoid harmful or offensive language.'\n"
+    if any(value > 0.5 for value in azure_result.values()):
+        suggestions += "Azure flagged some content. Suggested Rephrase: 'Try to avoid sensitive topics and ensure respectful language.'\n"
+    if toxic_classification == "Blocked":
+        suggestions += "Toxic BERT flagged the text. Suggested Rephrase: 'Please ensure your language is respectful and non-toxic.'"
+    return openai_result, anthropic_result, azure_result, toxic_result, bar_chart, suggestions
+def create_comparison_chart(categories, values):
+    fig, ax = plt.subplots()
+    ax.bar(categories, values, color=['red', 'orange', 'green', 'blue'])
+    ax.set_title("Content Moderation Comparison")
+    ax.set_ylabel("Severity Score")
+    ax.set_ylim(0, 1)
+    ax.set_xlabel("Moderation Tool")
+    return fig
 # Create the Gradio interface with updated input and output labels
 iface = gr.Interface(
     fn=moderate_text,
     inputs=gr.Textbox(lines=2, placeholder="Please write your text here..."),
+    outputs=[
+        gr.Textbox(label="OpenAI"),
+        gr.Textbox(label="Anthropic"),
+        gr.Textbox(label="Microsoft Azure"),
+        gr.Textbox(label="Toxic BERT"),
+        gr.Plot(label="Comparison Bar Chart"),
+        gr.Textbox(label="Safe Text Suggestions")
+    ],
     title="Content Moderation Tool",
+    description="Enter some text and get the moderation results from OpenAI, Anthropic, Azure Content Safety, Toxic BERT, and suggestions for safe rephrasing."
 )
 if __name__ == "__main__":
+    iface.launch()