Talk-to-Draw / app.py
aicodingfun's picture
Update app.py
d96b3d4 verified
raw
history blame
3.16 kB
import gradio as gr
from google import genai
from google.genai import types
import os
from PIL import Image
import io
API_KEY = os.environ.get("GOOGLE_API_KEY")
client = genai.Client(api_key=API_KEY)
def generate_image(description: str, style: str) -> Image.Image | None:
"""Generates an image using Gemini 2.0 Flash based on description and style."""
if not description:
print("Description is empty. Cannot generate image.")
return None
# Construct the prompt for the image generation model
image_prompt = f"""
Please create an illustration based on the following description and style.
- Description: {description}
- Style: {style}
- Format: High resolution, detailed image.
- Aspect Ratio: Square (1:1) is preferred, but follow the description if it implies a different ratio.
- **DO NOT** include any text in the image.
"""
print(f"Generating image with prompt: {image_prompt}")
try:
# Call the Gemini API
image_response = client.models.generate_content(
model="gemini-2.0-flash-preview-image-generation",
contents=image_prompt,
config=types.GenerateContentConfig(
response_modalities=['TEXT', 'IMAGE']
)
)
# Process the response to extract the image
if image_response and image_response.candidates and image_response.candidates[0].content.parts:
for part in image_response.candidates[0].content.parts:
if part.inline_data and part.inline_data.mime_type.startswith('image/'):
image_data_base64 = part.inline_data.data
img = Image.open(io.BytesIO(image_data_base64))
img = img.resize((512, 512), Image.Resampling.LANCZOS)
print("Image generated successfully.")
return img
print("Image data not found in any response parts or not an image.")
return None
else:
print("Image generation response is empty or malformed.")
return None
except Exception as e:
print(f"Error generating image: {e}")
# Handle specific API errors if needed
return None
# Define available styles for the dropdown
styles = [
"Sketch - 草圖",
"Watercolor - 水彩",
"Oil Painting - 油畫",
"Digital Art - 數位藝術",
"Cartoon - 卡通",
"Photorealistic - 寫實",
"Abstract - 抽象派",
"Pixel Art - 像素藝術",
"Anime - 動畫",
"Impressionist - 印象派"
]
# Create the Gradio interface
app = gr.Interface(
fn=generate_image,
inputs=[
gr.Textbox(label="✏️ Enter image description", lines=3, interactive=True),
gr.Dropdown(choices=styles, label="🎨 Select Style", value="Sketch - 草圖") # Default style
],
outputs=gr.Image(label="Generated Image"),
title="🤖 Talk to Draw! 🎨",
description="Collaborate with AI to draw images from text or voice descriptions with different styles, powered by Google Gemini 2.0 Flash.",
flagging_mode="never" # Disable flagging feature
)
app.launch()