Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from PIL import Image, ImageEnhance | |
import cv2 | |
from transformers import CLIPProcessor, CLIPModel | |
import torch | |
import openai | |
# Set OpenAI API Key | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
# Load the CLIP model for object identification | |
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16") | |
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16") | |
# Predefined object labels for CLIP | |
object_labels = [ | |
"cat", "dog", "house", "tree", "car", "mountain", "flower", "bird", "person", "robot", | |
"a digital artwork", "a portrait", "a landscape", "a futuristic cityscape" | |
] | |
def analyze_contrast_opencv(image_path): | |
"""Analyze the contrast of the uploaded image using OpenCV.""" | |
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) | |
contrast = img.std() # Standard deviation of pixel intensities | |
return contrast | |
def identify_objects_with_clip(image_path): | |
"""Identify objects in the image using CLIP.""" | |
image = Image.open(image_path).convert("RGB") | |
inputs = clip_processor(text=object_labels, images=image, return_tensors="pt", padding=True) | |
with torch.no_grad(): | |
outputs = clip_model(**inputs) | |
logits_per_image = outputs.logits_per_image | |
probs = logits_per_image.softmax(dim=1).numpy().flatten() | |
# Get the label with the highest probability | |
best_match_idx = probs.argmax() | |
best_match_label = object_labels[best_match_idx] | |
return best_match_label | |
def enhance_contrast(image): | |
"""Enhance the contrast of the image.""" | |
enhancer = ImageEnhance.Contrast(image) | |
enhanced_image = enhancer.enhance(2.0) # Increase contrast | |
enhanced_path = "enhanced_image.png" | |
enhanced_image.save(enhanced_path) | |
return enhanced_path | |
def suggest_improvements_with_chatgpt(object_name): | |
"""Generate improvement suggestions for the identified object using ChatGPT.""" | |
prompt = f"Suggest ways to improve a digital artwork featuring a {object_name}." | |
response = openai.ChatCompletion.create( | |
model="gpt-4", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant providing expert art improvement suggestions."}, | |
{"role": "user", "content": prompt} | |
] | |
) | |
return response["choices"][0]["message"]["content"] | |
def diane_multimodal(message, history): | |
""" | |
Process both text and image input: | |
- Respond to text prompts about digital art. | |
- Analyze image contrast, identify objects, and provide feedback. | |
- Enhance contrast or suggest improvements when requested. | |
""" | |
response = "" | |
num_images = len(message["files"]) | |
if num_images > 0: | |
# Handle uploaded images | |
last_image_path = message["files"][-1] | |
contrast = analyze_contrast_opencv(last_image_path) | |
identified_object = identify_objects_with_clip(last_image_path) | |
if contrast < 25: # Adjust threshold as needed | |
response = ( | |
f"Hey, great drawing of a {identified_object}! However, it looks like the contrast is too low. " | |
"Would you like me to improve it?" | |
) | |
# Save the analyzed image path to the history | |
history.append({"role": "assistant", "content": (last_image_path,)}) | |
else: | |
response = ( | |
f"Hey, great drawing of a {identified_object}! Looks like the color contrast is great, be proud of yourself :)" | |
) | |
return {"role": "assistant", "content": response} | |
elif message["text"]: | |
# Handle text-based inputs | |
user_text = message["text"].lower() | |
if any(keyword in user_text for keyword in ["improve", "yes", "better"]): | |
# Check if an image was previously uploaded | |
for entry in reversed(history): | |
if isinstance(entry["content"], tuple): # Check for image in history | |
last_image_path = entry["content"][0] | |
enhanced_image_path = enhance_contrast(Image.open(last_image_path).convert("RGB")) | |
# Return the text message first | |
history.append({"role": "assistant", "content": "Here's your improved image! Let me know if you'd like further adjustments."}) | |
# Return the image in a separate message | |
return {"role": "assistant", "content": gr.Image(enhanced_image_path)} | |
elif "suggestions" in user_text: | |
for entry in reversed(history): | |
if isinstance(entry["content"], tuple): # Check for image in history | |
last_image_path = entry["content"][0] | |
identified_object = identify_objects_with_clip(last_image_path) | |
improvement_suggestions = suggest_improvements_with_chatgpt(identified_object) | |
return {"role": "assistant", "content": improvement_suggestions} | |
else: | |
response = "Feel free to upload an image or ask for art tips!" | |
return {"role": "assistant", "content": response} | |
# Define the multimodal chatbot interface | |
demo = gr.ChatInterface( | |
fn=diane_multimodal, | |
type="messages", | |
examples=[ | |
{"text": "Teach me about digital art tools", "files": []}, | |
{"text": "What is the best way to do shading digitally?", "files": []}, | |
], | |
multimodal=True, | |
textbox=gr.MultimodalTextbox(file_count="multiple", file_types=["image"]), | |
title="Your Digital Art Nice Expert (DIANE)", | |
description="Use this chatbot to improve your digital art skills or analyze and enhance image contrast.", | |
) | |
demo.launch() | |