Spaces:
Sleeping
Sleeping
File size: 5,640 Bytes
a2ff906 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os
import gradio as gr
from PIL import Image, ImageEnhance
import cv2
from transformers import CLIPProcessor, CLIPModel
import torch
import openai
# Set OpenAI API Key
openai.api_key = os.getenv("OPENAI_API_KEY")
# Load the CLIP model for object identification
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
# Predefined object labels for CLIP
object_labels = [
"cat", "dog", "house", "tree", "car", "mountain", "flower", "bird", "person", "robot",
"a digital artwork", "a portrait", "a landscape", "a futuristic cityscape"
]
def analyze_contrast_opencv(image_path):
"""Analyze the contrast of the uploaded image using OpenCV."""
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
contrast = img.std() # Standard deviation of pixel intensities
return contrast
def identify_objects_with_clip(image_path):
"""Identify objects in the image using CLIP."""
image = Image.open(image_path).convert("RGB")
inputs = clip_processor(text=object_labels, images=image, return_tensors="pt", padding=True)
with torch.no_grad():
outputs = clip_model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1).numpy().flatten()
# Get the label with the highest probability
best_match_idx = probs.argmax()
best_match_label = object_labels[best_match_idx]
return best_match_label
def enhance_contrast(image):
"""Enhance the contrast of the image."""
enhancer = ImageEnhance.Contrast(image)
enhanced_image = enhancer.enhance(2.0) # Increase contrast
enhanced_path = "enhanced_image.png"
enhanced_image.save(enhanced_path)
return enhanced_path
def suggest_improvements_with_chatgpt(object_name):
"""Generate improvement suggestions for the identified object using ChatGPT."""
prompt = f"Suggest ways to improve a digital artwork featuring a {object_name}."
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant providing expert art improvement suggestions."},
{"role": "user", "content": prompt}
]
)
return response["choices"][0]["message"]["content"]
def diane_multimodal(message, history):
"""
Process both text and image input:
- Respond to text prompts about digital art.
- Analyze image contrast, identify objects, and provide feedback.
- Enhance contrast or suggest improvements when requested.
"""
response = ""
num_images = len(message["files"])
if num_images > 0:
# Handle uploaded images
last_image_path = message["files"][-1]
contrast = analyze_contrast_opencv(last_image_path)
identified_object = identify_objects_with_clip(last_image_path)
if contrast < 25: # Adjust threshold as needed
response = (
f"Hey, great drawing of a {identified_object}! However, it looks like the contrast is too low. "
"Would you like me to improve it?"
)
# Save the analyzed image path to the history
history.append({"role": "assistant", "content": (last_image_path,)})
else:
response = (
f"Hey, great drawing of a {identified_object}! Looks like the color contrast is great, be proud of yourself :)"
)
return {"role": "assistant", "content": response}
elif message["text"]:
# Handle text-based inputs
user_text = message["text"].lower()
if any(keyword in user_text for keyword in ["improve", "yes", "better"]):
# Check if an image was previously uploaded
for entry in reversed(history):
if isinstance(entry["content"], tuple): # Check for image in history
last_image_path = entry["content"][0]
enhanced_image_path = enhance_contrast(Image.open(last_image_path).convert("RGB"))
# Return the text message first
history.append({"role": "assistant", "content": "Here's your improved image! Let me know if you'd like further adjustments."})
# Return the image in a separate message
return {"role": "assistant", "content": gr.Image(enhanced_image_path)}
elif "suggestions" in user_text:
for entry in reversed(history):
if isinstance(entry["content"], tuple): # Check for image in history
last_image_path = entry["content"][0]
identified_object = identify_objects_with_clip(last_image_path)
improvement_suggestions = suggest_improvements_with_chatgpt(identified_object)
return {"role": "assistant", "content": improvement_suggestions}
else:
response = "Feel free to upload an image or ask for art tips!"
return {"role": "assistant", "content": response}
# Define the multimodal chatbot interface
demo = gr.ChatInterface(
fn=diane_multimodal,
type="messages",
examples=[
{"text": "Teach me about digital art tools", "files": []},
{"text": "What is the best way to do shading digitally?", "files": []},
],
multimodal=True,
textbox=gr.MultimodalTextbox(file_count="multiple", file_types=["image"]),
title="Your Digital Art Nice Expert (DIANE)",
description="Use this chatbot to improve your digital art skills or analyze and enhance image contrast.",
)
demo.launch()
|