import gradio as gr import openai import os from anthropic import Anthropic from azure.ai.contentsafety import ContentSafetyClient from azure.ai.contentsafety.models import TextCategory from azure.core.credentials import AzureKeyCredential from azure.core.exceptions import HttpResponseError from azure.ai.contentsafety.models import AnalyzeTextOptions from transformers import pipeline # Importing Hugging Face pipeline for Toxic BERT # Load OpenAI and Anthropic API Keys from environment variables openai.api_key = os.getenv("openaiapikey") anthropic_api_key = os.getenv("anthropickey") # Initialize Anthropic client client = Anthropic(api_key=anthropic_api_key) MODEL_NAME = "claude-3-haiku-20240307" # Load Toxic BERT model from Hugging Face toxic_bert = pipeline("text-classification", model="unitary/toxic-bert") # Function for Azure Content Safety analysis def analyze_text_azure(user_text): # Retrieve Azure keys from Hugging Face secrets (as environment variables) key = os.getenv("azurekey") endpoint = os.getenv("azureendpoint") # Create Content Safety client client = ContentSafetyClient(endpoint, AzureKeyCredential(key)) # Construct request request = AnalyzeTextOptions(text=user_text) # Analyze text try: response = client.analyze_text(request) except HttpResponseError as e: return f"Error occurred with Azure Content Safety: {e}" # Extract moderation results results = [] hate_result = next((item for item in response.categories_analysis if item.category == TextCategory.HATE), None) self_harm_result = next((item for item in response.categories_analysis if item.category == TextCategory.SELF_HARM), None) sexual_result = next((item for item in response.categories_analysis if item.category == TextCategory.SEXUAL), None) violence_result = next((item for item in response.categories_analysis if item.category == TextCategory.VIOLENCE), None) if hate_result: results.append(f"Hate severity: {hate_result.severity}") if self_harm_result: results.append(f"SelfHarm severity: {self_harm_result.severity}") if sexual_result: results.append(f"Sexual severity: {sexual_result.severity}") if violence_result: results.append(f"Violence severity: {violence_result.severity}") return "\n".join(results) if results else "No flagged content detected in Azure Content Safety." def moderate_text(user_text): # OpenAI Moderation openai_moderation_result = "Error in OpenAI Moderation" try: response = openai.Moderation.create(input=user_text) moderation_categories = response["results"][0]["categories"] moderation_flagged = response["results"][0]["flagged"] if moderation_flagged: openai_moderation_result = f"OpenAI flags the text for the following categories: {', '.join([category for category, flagged in moderation_categories.items() if flagged])}" else: openai_moderation_result = "The text is not flagged for any moderation issues by OpenAI." except Exception as e: openai_moderation_result = f"Error occurred with OpenAI: {e}" # Anthropic Moderation anthropic_moderation_result = "Error in Anthropic Moderation" try: prompt_template = """ You are a content moderation expert tasked with categorizing user-generated text. Here is the user-generated text to categorize: {user_text} Based on the content, classify this text as either ALLOW or BLOCK. Return nothing else. """ # Format the prompt with the user text prompt = prompt_template.format(user_text=user_text) # Send the prompt to Claude and get the response response = client.messages.create( model=MODEL_NAME, max_tokens=10, messages=[{"role": "user", "content": prompt}] ).content[0].text anthropic_moderation_result = f"Anthropic's moderation result: {response}" except Exception as e: anthropic_moderation_result = f"Error occurred with Anthropic: {e}" # Azure Content Safety Moderation azure_moderation_result = analyze_text_azure(user_text) # Toxic BERT Moderation (Hugging Face Model) toxic_result = toxic_bert(user_text) toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed" # Adjust the threshold for toxic classification if necessary (e.g., block if score > 0.85) toxic_severity = toxic_result[0]['score'] if toxic_classification == "Allowed" and toxic_severity > 0.85: # Set your threshold here toxic_classification = "Blocked" toxic_explanation = f"Toxic BERT classification: {toxic_classification}, Confidence: {toxic_severity:.2f}" return openai_moderation_result, anthropic_moderation_result, azure_moderation_result, toxic_explanation # Create the Gradio interface with updated input and output labels iface = gr.Interface( fn=moderate_text, inputs=gr.Textbox(lines=2, placeholder="Please write your text here..."), outputs=[ gr.Textbox(label="OpenAI"), gr.Textbox(label="Anthropic"), gr.Textbox(label="Microsoft Azure"), gr.Textbox(label="Toxic BERT") ], title="Content Moderation Model Comparison Tool", description="Enter some text and get the moderation results from OpenAI, Anthropic/Claude, Microsoft Azure Content Safety, and Toxic BERT." ) if __name__ == "__main__": iface.launch()