Spaces:
Sleeping
Sleeping
import gradio as gr | |
import openai | |
import os | |
from anthropic import Anthropic | |
from azure.ai.contentsafety import ContentSafetyClient | |
from azure.ai.contentsafety.models import TextCategory | |
from azure.core.credentials import AzureKeyCredential | |
from azure.core.exceptions import HttpResponseError | |
from azure.ai.contentsafety.models import AnalyzeTextOptions | |
from transformers import pipeline # Importing Hugging Face pipeline for Toxic BERT | |
# Load OpenAI and Anthropic API Keys from environment variables | |
openai.api_key = os.getenv("openaiapikey") | |
anthropic_api_key = os.getenv("anthropickey") | |
# Initialize Anthropic client | |
client = Anthropic(api_key=anthropic_api_key) | |
MODEL_NAME = "claude-3-haiku-20240307" | |
# Load Toxic BERT model from Hugging Face | |
toxic_bert = pipeline("text-classification", model="unitary/toxic-bert") | |
# Function for Azure Content Safety analysis | |
def analyze_text_azure(user_text): | |
# Retrieve Azure keys from Hugging Face secrets (as environment variables) | |
key = os.getenv("azurekey") | |
endpoint = os.getenv("azureendpoint") | |
# Create Content Safety client | |
client = ContentSafetyClient(endpoint, AzureKeyCredential(key)) | |
# Construct request | |
request = AnalyzeTextOptions(text=user_text) | |
# Analyze text | |
try: | |
response = client.analyze_text(request) | |
except HttpResponseError as e: | |
return f"Error occurred with Azure Content Safety: {e}" | |
# Extract moderation results | |
results = [] | |
hate_result = next((item for item in response.categories_analysis if item.category == TextCategory.HATE), None) | |
self_harm_result = next((item for item in response.categories_analysis if item.category == TextCategory.SELF_HARM), None) | |
sexual_result = next((item for item in response.categories_analysis if item.category == TextCategory.SEXUAL), None) | |
violence_result = next((item for item in response.categories_analysis if item.category == TextCategory.VIOLENCE), None) | |
if hate_result: | |
results.append(f"Hate severity: {hate_result.severity}") | |
if self_harm_result: | |
results.append(f"SelfHarm severity: {self_harm_result.severity}") | |
if sexual_result: | |
results.append(f"Sexual severity: {sexual_result.severity}") | |
if violence_result: | |
results.append(f"Violence severity: {violence_result.severity}") | |
return "\n".join(results) if results else "No flagged content detected in Azure Content Safety." | |
def moderate_text(user_text): | |
# OpenAI Moderation | |
openai_moderation_result = "Error in OpenAI Moderation" | |
try: | |
response = openai.Moderation.create(input=user_text) | |
moderation_categories = response["results"][0]["categories"] | |
moderation_flagged = response["results"][0]["flagged"] | |
if moderation_flagged: | |
openai_moderation_result = f"OpenAI flags the text for the following categories: {', '.join([category for category, flagged in moderation_categories.items() if flagged])}" | |
else: | |
openai_moderation_result = "The text is not flagged for any moderation issues by OpenAI." | |
except Exception as e: | |
openai_moderation_result = f"Error occurred with OpenAI: {e}" | |
# Anthropic Moderation | |
anthropic_moderation_result = "Error in Anthropic Moderation" | |
try: | |
prompt_template = """ | |
You are a content moderation expert tasked with categorizing user-generated text. | |
Here is the user-generated text to categorize: | |
<user_text>{user_text}</user_text> | |
Based on the content, classify this text as either ALLOW or BLOCK. Return nothing else. | |
""" | |
# Format the prompt with the user text | |
prompt = prompt_template.format(user_text=user_text) | |
# Send the prompt to Claude and get the response | |
response = client.messages.create( | |
model=MODEL_NAME, | |
max_tokens=10, | |
messages=[{"role": "user", "content": prompt}] | |
).content[0].text | |
anthropic_moderation_result = f"Anthropic's moderation result: {response}" | |
except Exception as e: | |
anthropic_moderation_result = f"Error occurred with Anthropic: {e}" | |
# Azure Content Safety Moderation | |
azure_moderation_result = analyze_text_azure(user_text) | |
# Toxic BERT Moderation (Hugging Face Model) | |
toxic_result = toxic_bert(user_text) | |
toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed" | |
# Adjust the threshold for toxic classification if necessary (e.g., block if score > 0.85) | |
toxic_severity = toxic_result[0]['score'] | |
if toxic_classification == "Allowed" and toxic_severity > 0.85: # Set your threshold here | |
toxic_classification = "Blocked" | |
toxic_explanation = f"Toxic BERT classification: {toxic_classification}, Confidence: {toxic_severity:.2f}" | |
return openai_moderation_result, anthropic_moderation_result, azure_moderation_result, toxic_explanation | |
# Create the Gradio interface with updated input and output labels | |
iface = gr.Interface( | |
fn=moderate_text, | |
inputs=gr.Textbox(lines=2, placeholder="Please write your text here..."), | |
outputs=[ | |
gr.Textbox(label="OpenAI"), | |
gr.Textbox(label="Anthropic"), | |
gr.Textbox(label="Microsoft Azure"), | |
gr.Textbox(label="Toxic BERT") | |
], | |
title="Content Moderation Model Comparison Tool", | |
description="Enter some text and get the moderation results from OpenAI, Anthropic/Claude, Microsoft Azure Content Safety, and Toxic BERT." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |