oscurantismo commited on
Commit
a2ff906
·
verified ·
1 Parent(s): d425a4e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from PIL import Image, ImageEnhance
4
+ import cv2
5
+ from transformers import CLIPProcessor, CLIPModel
6
+ import torch
7
+ import openai
8
+
9
+ # Set OpenAI API Key
10
+ openai.api_key = os.getenv("OPENAI_API_KEY")
11
+
12
+ # Load the CLIP model for object identification
13
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
14
+ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
15
+
16
+ # Predefined object labels for CLIP
17
+ object_labels = [
18
+ "cat", "dog", "house", "tree", "car", "mountain", "flower", "bird", "person", "robot",
19
+ "a digital artwork", "a portrait", "a landscape", "a futuristic cityscape"
20
+ ]
21
+
22
+ def analyze_contrast_opencv(image_path):
23
+ """Analyze the contrast of the uploaded image using OpenCV."""
24
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
25
+ contrast = img.std() # Standard deviation of pixel intensities
26
+ return contrast
27
+
28
+ def identify_objects_with_clip(image_path):
29
+ """Identify objects in the image using CLIP."""
30
+ image = Image.open(image_path).convert("RGB")
31
+ inputs = clip_processor(text=object_labels, images=image, return_tensors="pt", padding=True)
32
+
33
+ with torch.no_grad():
34
+ outputs = clip_model(**inputs)
35
+ logits_per_image = outputs.logits_per_image
36
+ probs = logits_per_image.softmax(dim=1).numpy().flatten()
37
+
38
+ # Get the label with the highest probability
39
+ best_match_idx = probs.argmax()
40
+ best_match_label = object_labels[best_match_idx]
41
+ return best_match_label
42
+
43
+ def enhance_contrast(image):
44
+ """Enhance the contrast of the image."""
45
+ enhancer = ImageEnhance.Contrast(image)
46
+ enhanced_image = enhancer.enhance(2.0) # Increase contrast
47
+ enhanced_path = "enhanced_image.png"
48
+ enhanced_image.save(enhanced_path)
49
+ return enhanced_path
50
+
51
+ def suggest_improvements_with_chatgpt(object_name):
52
+ """Generate improvement suggestions for the identified object using ChatGPT."""
53
+ prompt = f"Suggest ways to improve a digital artwork featuring a {object_name}."
54
+ response = openai.ChatCompletion.create(
55
+ model="gpt-4",
56
+ messages=[
57
+ {"role": "system", "content": "You are a helpful assistant providing expert art improvement suggestions."},
58
+ {"role": "user", "content": prompt}
59
+ ]
60
+ )
61
+ return response["choices"][0]["message"]["content"]
62
+
63
+ def diane_multimodal(message, history):
64
+ """
65
+ Process both text and image input:
66
+ - Respond to text prompts about digital art.
67
+ - Analyze image contrast, identify objects, and provide feedback.
68
+ - Enhance contrast or suggest improvements when requested.
69
+ """
70
+ response = ""
71
+ num_images = len(message["files"])
72
+
73
+ if num_images > 0:
74
+ # Handle uploaded images
75
+ last_image_path = message["files"][-1]
76
+ contrast = analyze_contrast_opencv(last_image_path)
77
+ identified_object = identify_objects_with_clip(last_image_path)
78
+
79
+ if contrast < 25: # Adjust threshold as needed
80
+ response = (
81
+ f"Hey, great drawing of a {identified_object}! However, it looks like the contrast is too low. "
82
+ "Would you like me to improve it?"
83
+ )
84
+ # Save the analyzed image path to the history
85
+ history.append({"role": "assistant", "content": (last_image_path,)})
86
+ else:
87
+ response = (
88
+ f"Hey, great drawing of a {identified_object}! Looks like the color contrast is great, be proud of yourself :)"
89
+ )
90
+ return {"role": "assistant", "content": response}
91
+
92
+ elif message["text"]:
93
+ # Handle text-based inputs
94
+ user_text = message["text"].lower()
95
+ if any(keyword in user_text for keyword in ["improve", "yes", "better"]):
96
+ # Check if an image was previously uploaded
97
+ for entry in reversed(history):
98
+ if isinstance(entry["content"], tuple): # Check for image in history
99
+ last_image_path = entry["content"][0]
100
+ enhanced_image_path = enhance_contrast(Image.open(last_image_path).convert("RGB"))
101
+ # Return the text message first
102
+ history.append({"role": "assistant", "content": "Here's your improved image! Let me know if you'd like further adjustments."})
103
+ # Return the image in a separate message
104
+ return {"role": "assistant", "content": gr.Image(enhanced_image_path)}
105
+ elif "suggestions" in user_text:
106
+ for entry in reversed(history):
107
+ if isinstance(entry["content"], tuple): # Check for image in history
108
+ last_image_path = entry["content"][0]
109
+ identified_object = identify_objects_with_clip(last_image_path)
110
+ improvement_suggestions = suggest_improvements_with_chatgpt(identified_object)
111
+ return {"role": "assistant", "content": improvement_suggestions}
112
+ else:
113
+ response = "Feel free to upload an image or ask for art tips!"
114
+
115
+ return {"role": "assistant", "content": response}
116
+
117
+
118
+ # Define the multimodal chatbot interface
119
+ demo = gr.ChatInterface(
120
+ fn=diane_multimodal,
121
+ type="messages",
122
+ examples=[
123
+ {"text": "Teach me about digital art tools", "files": []},
124
+ {"text": "What is the best way to do shading digitally?", "files": []},
125
+ ],
126
+ multimodal=True,
127
+ textbox=gr.MultimodalTextbox(file_count="multiple", file_types=["image"]),
128
+ title="Your Digital Art Nice Expert (DIANE)",
129
+ description="Use this chatbot to improve your digital art skills or analyze and enhance image contrast.",
130
+ )
131
+
132
+ demo.launch()