File size: 5,640 Bytes
a2ff906
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import gradio as gr
from PIL import Image, ImageEnhance
import cv2
from transformers import CLIPProcessor, CLIPModel
import torch
import openai

# Set OpenAI API Key
openai.api_key = os.getenv("OPENAI_API_KEY")

# Load the CLIP model for object identification
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

# Predefined object labels for CLIP
object_labels = [
    "cat", "dog", "house", "tree", "car", "mountain", "flower", "bird", "person", "robot",
    "a digital artwork", "a portrait", "a landscape", "a futuristic cityscape"
]

def analyze_contrast_opencv(image_path):
    """Analyze the contrast of the uploaded image using OpenCV."""
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    contrast = img.std()  # Standard deviation of pixel intensities
    return contrast

def identify_objects_with_clip(image_path):
    """Identify objects in the image using CLIP."""
    image = Image.open(image_path).convert("RGB")
    inputs = clip_processor(text=object_labels, images=image, return_tensors="pt", padding=True)
    
    with torch.no_grad():
        outputs = clip_model(**inputs)
        logits_per_image = outputs.logits_per_image
        probs = logits_per_image.softmax(dim=1).numpy().flatten()
    
    # Get the label with the highest probability
    best_match_idx = probs.argmax()
    best_match_label = object_labels[best_match_idx]
    return best_match_label

def enhance_contrast(image):
    """Enhance the contrast of the image."""
    enhancer = ImageEnhance.Contrast(image)
    enhanced_image = enhancer.enhance(2.0)  # Increase contrast
    enhanced_path = "enhanced_image.png"
    enhanced_image.save(enhanced_path)
    return enhanced_path

def suggest_improvements_with_chatgpt(object_name):
    """Generate improvement suggestions for the identified object using ChatGPT."""
    prompt = f"Suggest ways to improve a digital artwork featuring a {object_name}."
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant providing expert art improvement suggestions."},
            {"role": "user", "content": prompt}
        ]
    )
    return response["choices"][0]["message"]["content"]

def diane_multimodal(message, history):
    """
    Process both text and image input:
    - Respond to text prompts about digital art.
    - Analyze image contrast, identify objects, and provide feedback.
    - Enhance contrast or suggest improvements when requested.
    """
    response = ""
    num_images = len(message["files"])

    if num_images > 0:
        # Handle uploaded images
        last_image_path = message["files"][-1]
        contrast = analyze_contrast_opencv(last_image_path)
        identified_object = identify_objects_with_clip(last_image_path)

        if contrast < 25:  # Adjust threshold as needed
            response = (
                f"Hey, great drawing of a {identified_object}! However, it looks like the contrast is too low. "
                "Would you like me to improve it?"
            )
            # Save the analyzed image path to the history
            history.append({"role": "assistant", "content": (last_image_path,)})
        else:
            response = (
                f"Hey, great drawing of a {identified_object}! Looks like the color contrast is great, be proud of yourself :)"
            )
        return {"role": "assistant", "content": response}

    elif message["text"]:
        # Handle text-based inputs
        user_text = message["text"].lower()
        if any(keyword in user_text for keyword in ["improve", "yes", "better"]):
            # Check if an image was previously uploaded
            for entry in reversed(history):
                if isinstance(entry["content"], tuple):  # Check for image in history
                    last_image_path = entry["content"][0]
                    enhanced_image_path = enhance_contrast(Image.open(last_image_path).convert("RGB"))
                    # Return the text message first
                    history.append({"role": "assistant", "content": "Here's your improved image! Let me know if you'd like further adjustments."})
                    # Return the image in a separate message
                    return {"role": "assistant", "content": gr.Image(enhanced_image_path)}
        elif "suggestions" in user_text:
            for entry in reversed(history):
                if isinstance(entry["content"], tuple):  # Check for image in history
                    last_image_path = entry["content"][0]
                    identified_object = identify_objects_with_clip(last_image_path)
                    improvement_suggestions = suggest_improvements_with_chatgpt(identified_object)
                    return {"role": "assistant", "content": improvement_suggestions}
        else:
            response = "Feel free to upload an image or ask for art tips!"
    
    return {"role": "assistant", "content": response}


# Define the multimodal chatbot interface
demo = gr.ChatInterface(
    fn=diane_multimodal,
    type="messages",
    examples=[
        {"text": "Teach me about digital art tools", "files": []},
        {"text": "What is the best way to do shading digitally?", "files": []},
    ],
    multimodal=True,
    textbox=gr.MultimodalTextbox(file_count="multiple", file_types=["image"]),
    title="Your Digital Art Nice Expert (DIANE)",
    description="Use this chatbot to improve your digital art skills or analyze and enhance image contrast.",
)

demo.launch()