Spaces:

oscurantismo
/

diane

Sleeping

File size: 5,640 Bytes

a2ff906

import os
import gradio as gr
from PIL import Image, ImageEnhance
import cv2
from transformers import CLIPProcessor, CLIPModel
import torch
import openai

# Set OpenAI API Key
openai.api_key = os.getenv("OPENAI_API_KEY")

# Load the CLIP model for object identification
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

# Predefined object labels for CLIP
object_labels = [
    "cat", "dog", "house", "tree", "car", "mountain", "flower", "bird", "person", "robot",
    "a digital artwork", "a portrait", "a landscape", "a futuristic cityscape"
]

def analyze_contrast_opencv(image_path):
    """Analyze the contrast of the uploaded image using OpenCV."""
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    contrast = img.std()  # Standard deviation of pixel intensities
    return contrast

def identify_objects_with_clip(image_path):
    """Identify objects in the image using CLIP."""
    image = Image.open(image_path).convert("RGB")
    inputs = clip_processor(text=object_labels, images=image, return_tensors="pt", padding=True)
    
    with torch.no_grad():
        outputs = clip_model(**inputs)
        logits_per_image = outputs.logits_per_image
        probs = logits_per_image.softmax(dim=1).numpy().flatten()
    
    # Get the label with the highest probability
    best_match_idx = probs.argmax()
    best_match_label = object_labels[best_match_idx]
    return best_match_label

def enhance_contrast(image):
    """Enhance the contrast of the image."""
    enhancer = ImageEnhance.Contrast(image)
    enhanced_image = enhancer.enhance(2.0)  # Increase contrast
    enhanced_path = "enhanced_image.png"
    enhanced_image.save(enhanced_path)
    return enhanced_path

def suggest_improvements_with_chatgpt(object_name):
    """Generate improvement suggestions for the identified object using ChatGPT."""
    prompt = f"Suggest ways to improve a digital artwork featuring a {object_name}."
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant providing expert art improvement suggestions."},
            {"role": "user", "content": prompt}
        ]
    )
    return response["choices"][0]["message"]["content"]

def diane_multimodal(message, history):
    """
    Process both text and image input:
    - Respond to text prompts about digital art.
    - Analyze image contrast, identify objects, and provide feedback.
    - Enhance contrast or suggest improvements when requested.
    """
    response = ""
    num_images = len(message["files"])

    if num_images > 0:
        # Handle uploaded images
        last_image_path = message["files"][-1]
        contrast = analyze_contrast_opencv(last_image_path)
        identified_object = identify_objects_with_clip(last_image_path)

        if contrast < 25:  # Adjust threshold as needed
            response = (
                f"Hey, great drawing of a {identified_object}! However, it looks like the contrast is too low. "
                "Would you like me to improve it?"
            )
            # Save the analyzed image path to the history
            history.append({"role": "assistant", "content": (last_image_path,)})
        else:
            response = (
                f"Hey, great drawing of a {identified_object}! Looks like the color contrast is great, be proud of yourself :)"
            )
        return {"role": "assistant", "content": response}

    elif message["text"]:
        # Handle text-based inputs
        user_text = message["text"].lower()
        if any(keyword in user_text for keyword in ["improve", "yes", "better"]):
            # Check if an image was previously uploaded
            for entry in reversed(history):
                if isinstance(entry["content"], tuple):  # Check for image in history
                    last_image_path = entry["content"][0]
                    enhanced_image_path = enhance_contrast(Image.open(last_image_path).convert("RGB"))
                    # Return the text message first
                    history.append({"role": "assistant", "content": "Here's your improved image! Let me know if you'd like further adjustments."})
                    # Return the image in a separate message
                    return {"role": "assistant", "content": gr.Image(enhanced_image_path)}
        elif "suggestions" in user_text:
            for entry in reversed(history):
                if isinstance(entry["content"], tuple):  # Check for image in history
                    last_image_path = entry["content"][0]
                    identified_object = identify_objects_with_clip(last_image_path)
                    improvement_suggestions = suggest_improvements_with_chatgpt(identified_object)
                    return {"role": "assistant", "content": improvement_suggestions}
        else:
            response = "Feel free to upload an image or ask for art tips!"
    
    return {"role": "assistant", "content": response}


# Define the multimodal chatbot interface
demo = gr.ChatInterface(
    fn=diane_multimodal,
    type="messages",
    examples=[
        {"text": "Teach me about digital art tools", "files": []},
        {"text": "What is the best way to do shading digitally?", "files": []},
    ],
    multimodal=True,
    textbox=gr.MultimodalTextbox(file_count="multiple", file_types=["image"]),
    title="Your Digital Art Nice Expert (DIANE)",
    description="Use this chatbot to improve your digital art skills or analyze and enhance image contrast.",
)

demo.launch()