Spaces:

oscurantismo
/

diane

Sleeping

App Files Files Community

diane / app.py

oscurantismo

Create app.py

a2ff906 verified 7 months ago

raw

history blame

5.64 kB

	import os
	import gradio as gr
	from PIL import Image, ImageEnhance
	import cv2
	from transformers import CLIPProcessor, CLIPModel
	import torch
	import openai

	# Set OpenAI API Key
	openai.api_key = os.getenv("OPENAI_API_KEY")

	# Load the CLIP model for object identification
	clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
	clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

	# Predefined object labels for CLIP
	object_labels = [
	"cat", "dog", "house", "tree", "car", "mountain", "flower", "bird", "person", "robot",
	"a digital artwork", "a portrait", "a landscape", "a futuristic cityscape"
	]

	def analyze_contrast_opencv(image_path):
	"""Analyze the contrast of the uploaded image using OpenCV."""
	img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
	contrast = img.std() # Standard deviation of pixel intensities
	return contrast

	def identify_objects_with_clip(image_path):
	"""Identify objects in the image using CLIP."""
	image = Image.open(image_path).convert("RGB")
	inputs = clip_processor(text=object_labels, images=image, return_tensors="pt", padding=True)

	with torch.no_grad():
	outputs = clip_model(**inputs)
	logits_per_image = outputs.logits_per_image
	probs = logits_per_image.softmax(dim=1).numpy().flatten()

	# Get the label with the highest probability
	best_match_idx = probs.argmax()
	best_match_label = object_labels[best_match_idx]
	return best_match_label

	def enhance_contrast(image):
	"""Enhance the contrast of the image."""
	enhancer = ImageEnhance.Contrast(image)
	enhanced_image = enhancer.enhance(2.0) # Increase contrast
	enhanced_path = "enhanced_image.png"
	enhanced_image.save(enhanced_path)
	return enhanced_path

	def suggest_improvements_with_chatgpt(object_name):
	"""Generate improvement suggestions for the identified object using ChatGPT."""
	prompt = f"Suggest ways to improve a digital artwork featuring a {object_name}."
	response = openai.ChatCompletion.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are a helpful assistant providing expert art improvement suggestions."},
	{"role": "user", "content": prompt}
	]
	)
	return response["choices"][0]["message"]["content"]

	def diane_multimodal(message, history):
	"""
	Process both text and image input:
	- Respond to text prompts about digital art.
	- Analyze image contrast, identify objects, and provide feedback.
	- Enhance contrast or suggest improvements when requested.
	"""
	response = ""
	num_images = len(message["files"])

	if num_images > 0:
	# Handle uploaded images
	last_image_path = message["files"][-1]
	contrast = analyze_contrast_opencv(last_image_path)
	identified_object = identify_objects_with_clip(last_image_path)

	if contrast < 25: # Adjust threshold as needed
	response = (
	f"Hey, great drawing of a {identified_object}! However, it looks like the contrast is too low. "
	"Would you like me to improve it?"
	)
	# Save the analyzed image path to the history
	history.append({"role": "assistant", "content": (last_image_path,)})
	else:
	response = (
	f"Hey, great drawing of a {identified_object}! Looks like the color contrast is great, be proud of yourself :)"
	)
	return {"role": "assistant", "content": response}

	elif message["text"]:
	# Handle text-based inputs
	user_text = message["text"].lower()
	if any(keyword in user_text for keyword in ["improve", "yes", "better"]):
	# Check if an image was previously uploaded
	for entry in reversed(history):
	if isinstance(entry["content"], tuple): # Check for image in history
	last_image_path = entry["content"][0]
	enhanced_image_path = enhance_contrast(Image.open(last_image_path).convert("RGB"))
	# Return the text message first
	history.append({"role": "assistant", "content": "Here's your improved image! Let me know if you'd like further adjustments."})
	# Return the image in a separate message
	return {"role": "assistant", "content": gr.Image(enhanced_image_path)}
	elif "suggestions" in user_text:
	for entry in reversed(history):
	if isinstance(entry["content"], tuple): # Check for image in history
	last_image_path = entry["content"][0]
	identified_object = identify_objects_with_clip(last_image_path)
	improvement_suggestions = suggest_improvements_with_chatgpt(identified_object)
	return {"role": "assistant", "content": improvement_suggestions}
	else:
	response = "Feel free to upload an image or ask for art tips!"

	return {"role": "assistant", "content": response}


	# Define the multimodal chatbot interface
	demo = gr.ChatInterface(
	fn=diane_multimodal,
	type="messages",
	examples=[
	{"text": "Teach me about digital art tools", "files": []},
	{"text": "What is the best way to do shading digitally?", "files": []},
	],
	multimodal=True,
	textbox=gr.MultimodalTextbox(file_count="multiple", file_types=["image"]),
	title="Your Digital Art Nice Expert (DIANE)",
	description="Use this chatbot to improve your digital art skills or analyze and enhance image contrast.",
	)

	demo.launch()