Spaces:

lg3394
/

aimoderationproject

Running

App Files Files Community

aimoderationproject / app.py

lg3394

Update app.py

4b1da5c verified 2 months ago

raw

history blame contribute delete

6.67 kB

	import gradio as gr
	import os
	from openai import OpenAI
	from anthropic import Anthropic
	from azure.ai.contentsafety import ContentSafetyClient
	from azure.ai.contentsafety.models import TextCategory
	from azure.core.credentials import AzureKeyCredential
	from azure.core.exceptions import HttpResponseError
	from azure.ai.contentsafety.models import AnalyzeTextOptions
	from transformers import pipeline # Importing Hugging Face pipeline for Toxic BERT

	# Try to get the API key from either environment variable
	api_key = os.getenv("OPENAI_API_KEY") or os.getenv("openaiapikey")
	if not api_key:
	print("WARNING: No OpenAI API key found in environment variables!")
	api_key = "placeholder_key_for_initialization" # This will cause a controlled error

	# Initialize OpenAI client
	openai_client = OpenAI(api_key=api_key)
	anthropic_api_key = os.getenv("anthropickey")

	# Initialize Anthropic client
	client = Anthropic(api_key=anthropic_api_key)

	MODEL_NAME = "claude-3-haiku-20240307"

	# Load Toxic BERT model from Hugging Face
	toxic_bert = pipeline("text-classification", model="unitary/toxic-bert")

	# Function for Azure Content Safety analysis
	def analyze_text_azure(user_text):
	# Retrieve Azure keys from Hugging Face secrets (as environment variables)
	key = os.getenv("azurekey")
	endpoint = os.getenv("azureendpoint")

	# Create Content Safety client
	client = ContentSafetyClient(endpoint, AzureKeyCredential(key))

	# Construct request
	request = AnalyzeTextOptions(text=user_text)

	# Analyze text
	try:
	response = client.analyze_text(request)
	except HttpResponseError as e:
	return f"Error occurred with Azure Content Safety: {e}"

	# Extract moderation results
	results = []
	hate_result = next((item for item in response.categories_analysis if item.category == TextCategory.HATE), None)
	self_harm_result = next((item for item in response.categories_analysis if item.category == TextCategory.SELF_HARM), None)
	sexual_result = next((item for item in response.categories_analysis if item.category == TextCategory.SEXUAL), None)
	violence_result = next((item for item in response.categories_analysis if item.category == TextCategory.VIOLENCE), None)

	if hate_result:
	results.append(f"Hate severity: {hate_result.severity}")
	if self_harm_result:
	results.append(f"SelfHarm severity: {self_harm_result.severity}")
	if sexual_result:
	results.append(f"Sexual severity: {sexual_result.severity}")
	if violence_result:
	results.append(f"Violence severity: {violence_result.severity}")

	return "\n".join(results) if results else "No flagged content detected in Azure Content Safety."

	def moderate_text(user_text):
	# OpenAI Moderation - WITH GRACEFUL ERROR HANDLING
	try:
	# Attempt to use the real API
	response = openai_client.moderations.create(input=user_text)

	moderation_categories = response.results[0].categories
	moderation_flagged = response.results[0].flagged

	if moderation_flagged:
	categories_dict = {k: v for k, v in vars(moderation_categories).items() if not k.startswith('_')}
	openai_moderation_result = f"OpenAI flags the text for the following categories: {', '.join([category for category, flagged in categories_dict.items() if flagged])}"
	else:
	openai_moderation_result = "The text is not flagged for any moderation issues by OpenAI."
	except Exception as e:
	# Create a professional-looking fallback response that shows API connectivity issues
	openai_moderation_result = """
	⚠️ OpenAI API Connection Error ⚠️

	The OpenAI Moderation API is currently unavailable. This may be due to:
	- Network connectivity issues in the hosting environment
	- API rate limits
	- API key configuration

	Example output (if connected):
	- For harmful content: "OpenAI flags the text for the following categories: violence, hate, self-harm"
	- For safe content: "The text is not flagged for any moderation issues by OpenAI."

	[This is a demonstration of a multi-model content moderation system that compares results across different providers]
	"""
	print(f"Debug - OpenAI Error: {str(e)}")

	# Anthropic Moderation
	anthropic_moderation_result = "Error in Anthropic Moderation"
	try:
	prompt_template = """
	You are a content moderation expert tasked with categorizing user-generated text.
	Here is the user-generated text to categorize:
	<user_text>{user_text}</user_text>
	Based on the content, classify this text as either ALLOW or BLOCK. Return nothing else.
	"""

	# Format the prompt with the user text
	prompt = prompt_template.format(user_text=user_text)

	# Send the prompt to Claude and get the response
	response = client.messages.create(
	model=MODEL_NAME,
	max_tokens=10,
	messages=[{"role": "user", "content": prompt}]
	).content[0].text

	anthropic_moderation_result = f"Anthropic's moderation result: {response}"

	except Exception as e:
	anthropic_moderation_result = f"Error occurred with Anthropic: {e}"

	# Azure Content Safety Moderation
	azure_moderation_result = analyze_text_azure(user_text)

	# Toxic BERT Moderation (Hugging Face Model)
	toxic_result = toxic_bert(user_text)
	toxic_classification = "Blocked" if toxic_result[0]['label'] == 'LABEL_1' else "Allowed"

	# Adjust the threshold for toxic classification if necessary (e.g., block if score > 0.85)
	toxic_severity = toxic_result[0]['score']
	if toxic_classification == "Allowed" and toxic_severity > 0.85: # Set your threshold here
	toxic_classification = "Blocked"

	toxic_explanation = f"Toxic BERT classification: {toxic_classification}, Confidence: {toxic_severity:.2f}"

	return openai_moderation_result, anthropic_moderation_result, azure_moderation_result, toxic_explanation


	# Create the Gradio interface with updated input and output labels
	iface = gr.Interface(
	fn=moderate_text,
	inputs=gr.Textbox(lines=2, placeholder="Please write your text here..."),
	outputs=[
	gr.Textbox(label="OpenAI"),
	gr.Textbox(label="Anthropic"),
	gr.Textbox(label="Microsoft Azure"),
	gr.Textbox(label="Toxic BERT")
	],
	title="Content Moderation Model Comparison Tool",
	description="Enter some text and get the moderation results from OpenAI, Anthropic/Claude, Microsoft Azure Content Safety, and Toxic BERT. Note: The OpenAI API connection may be unavailable in this demo."
	)

	if __name__ == "__main__":
	iface.launch()