Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
from PIL import Image, ImageEnhance
|
4 |
+
import cv2
|
5 |
+
from transformers import CLIPProcessor, CLIPModel
|
6 |
+
import torch
|
7 |
+
import openai
|
8 |
+
|
9 |
+
# Set OpenAI API Key
|
10 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
11 |
+
|
12 |
+
# Load the CLIP model for object identification
|
13 |
+
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
|
14 |
+
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
|
15 |
+
|
16 |
+
# Predefined object labels for CLIP
|
17 |
+
object_labels = [
|
18 |
+
"cat", "dog", "house", "tree", "car", "mountain", "flower", "bird", "person", "robot",
|
19 |
+
"a digital artwork", "a portrait", "a landscape", "a futuristic cityscape"
|
20 |
+
]
|
21 |
+
|
22 |
+
def analyze_contrast_opencv(image_path):
|
23 |
+
"""Analyze the contrast of the uploaded image using OpenCV."""
|
24 |
+
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
|
25 |
+
contrast = img.std() # Standard deviation of pixel intensities
|
26 |
+
return contrast
|
27 |
+
|
28 |
+
def identify_objects_with_clip(image_path):
|
29 |
+
"""Identify objects in the image using CLIP."""
|
30 |
+
image = Image.open(image_path).convert("RGB")
|
31 |
+
inputs = clip_processor(text=object_labels, images=image, return_tensors="pt", padding=True)
|
32 |
+
|
33 |
+
with torch.no_grad():
|
34 |
+
outputs = clip_model(**inputs)
|
35 |
+
logits_per_image = outputs.logits_per_image
|
36 |
+
probs = logits_per_image.softmax(dim=1).numpy().flatten()
|
37 |
+
|
38 |
+
# Get the label with the highest probability
|
39 |
+
best_match_idx = probs.argmax()
|
40 |
+
best_match_label = object_labels[best_match_idx]
|
41 |
+
return best_match_label
|
42 |
+
|
43 |
+
def enhance_contrast(image):
|
44 |
+
"""Enhance the contrast of the image."""
|
45 |
+
enhancer = ImageEnhance.Contrast(image)
|
46 |
+
enhanced_image = enhancer.enhance(2.0) # Increase contrast
|
47 |
+
enhanced_path = "enhanced_image.png"
|
48 |
+
enhanced_image.save(enhanced_path)
|
49 |
+
return enhanced_path
|
50 |
+
|
51 |
+
def suggest_improvements_with_chatgpt(object_name):
|
52 |
+
"""Generate improvement suggestions for the identified object using ChatGPT."""
|
53 |
+
prompt = f"Suggest ways to improve a digital artwork featuring a {object_name}."
|
54 |
+
response = openai.ChatCompletion.create(
|
55 |
+
model="gpt-4",
|
56 |
+
messages=[
|
57 |
+
{"role": "system", "content": "You are a helpful assistant providing expert art improvement suggestions."},
|
58 |
+
{"role": "user", "content": prompt}
|
59 |
+
]
|
60 |
+
)
|
61 |
+
return response["choices"][0]["message"]["content"]
|
62 |
+
|
63 |
+
def diane_multimodal(message, history):
|
64 |
+
"""
|
65 |
+
Process both text and image input:
|
66 |
+
- Respond to text prompts about digital art.
|
67 |
+
- Analyze image contrast, identify objects, and provide feedback.
|
68 |
+
- Enhance contrast or suggest improvements when requested.
|
69 |
+
"""
|
70 |
+
response = ""
|
71 |
+
num_images = len(message["files"])
|
72 |
+
|
73 |
+
if num_images > 0:
|
74 |
+
# Handle uploaded images
|
75 |
+
last_image_path = message["files"][-1]
|
76 |
+
contrast = analyze_contrast_opencv(last_image_path)
|
77 |
+
identified_object = identify_objects_with_clip(last_image_path)
|
78 |
+
|
79 |
+
if contrast < 25: # Adjust threshold as needed
|
80 |
+
response = (
|
81 |
+
f"Hey, great drawing of a {identified_object}! However, it looks like the contrast is too low. "
|
82 |
+
"Would you like me to improve it?"
|
83 |
+
)
|
84 |
+
# Save the analyzed image path to the history
|
85 |
+
history.append({"role": "assistant", "content": (last_image_path,)})
|
86 |
+
else:
|
87 |
+
response = (
|
88 |
+
f"Hey, great drawing of a {identified_object}! Looks like the color contrast is great, be proud of yourself :)"
|
89 |
+
)
|
90 |
+
return {"role": "assistant", "content": response}
|
91 |
+
|
92 |
+
elif message["text"]:
|
93 |
+
# Handle text-based inputs
|
94 |
+
user_text = message["text"].lower()
|
95 |
+
if any(keyword in user_text for keyword in ["improve", "yes", "better"]):
|
96 |
+
# Check if an image was previously uploaded
|
97 |
+
for entry in reversed(history):
|
98 |
+
if isinstance(entry["content"], tuple): # Check for image in history
|
99 |
+
last_image_path = entry["content"][0]
|
100 |
+
enhanced_image_path = enhance_contrast(Image.open(last_image_path).convert("RGB"))
|
101 |
+
# Return the text message first
|
102 |
+
history.append({"role": "assistant", "content": "Here's your improved image! Let me know if you'd like further adjustments."})
|
103 |
+
# Return the image in a separate message
|
104 |
+
return {"role": "assistant", "content": gr.Image(enhanced_image_path)}
|
105 |
+
elif "suggestions" in user_text:
|
106 |
+
for entry in reversed(history):
|
107 |
+
if isinstance(entry["content"], tuple): # Check for image in history
|
108 |
+
last_image_path = entry["content"][0]
|
109 |
+
identified_object = identify_objects_with_clip(last_image_path)
|
110 |
+
improvement_suggestions = suggest_improvements_with_chatgpt(identified_object)
|
111 |
+
return {"role": "assistant", "content": improvement_suggestions}
|
112 |
+
else:
|
113 |
+
response = "Feel free to upload an image or ask for art tips!"
|
114 |
+
|
115 |
+
return {"role": "assistant", "content": response}
|
116 |
+
|
117 |
+
|
118 |
+
# Define the multimodal chatbot interface
|
119 |
+
demo = gr.ChatInterface(
|
120 |
+
fn=diane_multimodal,
|
121 |
+
type="messages",
|
122 |
+
examples=[
|
123 |
+
{"text": "Teach me about digital art tools", "files": []},
|
124 |
+
{"text": "What is the best way to do shading digitally?", "files": []},
|
125 |
+
],
|
126 |
+
multimodal=True,
|
127 |
+
textbox=gr.MultimodalTextbox(file_count="multiple", file_types=["image"]),
|
128 |
+
title="Your Digital Art Nice Expert (DIANE)",
|
129 |
+
description="Use this chatbot to improve your digital art skills or analyze and enhance image contrast.",
|
130 |
+
)
|
131 |
+
|
132 |
+
demo.launch()
|