Spaces:

victorgg
/

gemini_2.0_image_gen

Running

App Files Files Community

victorgg commited on Mar 27

Commit

11d4840

verified ·

1 Parent(s): 55d2024

Create app.py

Browse files

Files changed (1) hide show

app.py +199 -0

app.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import json
+import os
+import time
+import uuid
+import tempfile
+from PIL import Image
+import gradio as gr
+import base64
+import mimetypes
+import logging
+from google import genai
+from google.genai import types
+# Configure logging
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+def save_binary_file(file_name, data):
+    logger.debug(f"Saving binary data to file: {file_name}")
+    with open(file_name, "wb") as f:
+        f.write(data)
+    logger.debug(f"File saved successfully: {file_name}")
+def generate(text, file_name, api_key, model="gemini-2.0-flash-exp-image-generation"):
+    logger.debug(f"Starting generate function with text: '{text}', file_name: '{file_name}', model: '{model}'")
+    try:
+        # Initialize client
+        effective_api_key = api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY")
+        logger.debug(f"Using API Key: {'Provided' if api_key.strip() else 'From Environment Variable'}")
+        if not effective_api_key:
+            logger.error("No API key provided or found in environment variable.")
+            raise ValueError("API key is required.")
+        client = genai.Client(api_key=effective_api_key)
+        logger.debug("Gemini client initialized.")
+        files = [
+            client.files.upload(file=file_name),
+        ]
+        logger.debug(f"File uploaded. URI: {files[0].uri}, MIME Type: {files[0].mime_type}")
+        contents = [
+            types.Content(
+                role="user",
+                parts=[
+                    types.Part.from_uri(
+                        file_uri=files[0].uri,
+                        mime_type=files[0].mime_type,
+                    ),
+                    types.Part.from_text(text=text),
+                ],
+            ),
+        ]
+        logger.debug(f"Content object created: {contents}")
+        generate_content_config = types.GenerateContentConfig(
+            temperature=1,
+            top_p=0.95,
+            top_k=40,
+            max_output_tokens=8192,
+            response_modalities=[
+                "image",
+                "text",
+            ],
+            response_mime_type="text/plain",
+        )
+        logger.debug(f"Generate content config: {generate_content_config}")
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+            temp_path = tmp.name
+            logger.debug(f"Temporary file created: {temp_path}")
+            response_stream = client.models.generate_content_stream(
+                model=model,
+                contents=contents,
+                config=generate_content_config,
+            )
+            logger.debug("Starting to process response stream...")
+            for chunk in response_stream:
+                if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
+                    logger.warning("Chunk has no candidates, content, or parts. Skipping.")
+                    continue
+                inline_data = chunk.candidates[0].content.parts[0].inline_data
+                if inline_data:
+                    save_binary_file(temp_path, inline_data.data)
+                    logger.info(f"File of mime type {inline_data.mime_type} saved to: {temp_path} and prompt input :{text}")
+                else:
+                    logger.info(f"Received text: {chunk.text}")
+                    print(chunk.text) # Keep the print for immediate console output
+                # Log the raw chunk for deeper inspection
+                logger.debug(f"Raw chunk: {chunk}")
+        del files
+        logger.debug("Uploaded files deleted.")
+        return temp_path
+    except Exception as e:
+        logger.exception("An error occurred during generation:")  # This will log the full traceback
+        return None # Return None when error happens
+def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
+    logger.debug(f"Starting process_image_and_prompt with prompt: '{prompt}'")
+    try:
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+            composite_path = tmp.name
+            composite_pil.save(composite_path)
+            logger.debug(f"Composite image saved to: {composite_path}")
+        file_name = composite_path
+        input_text = prompt
+        model = "gemini-2.0-flash-exp-image-generation"  #  Consider changing this to "gemini-pro-vision"
+        gemma_edited_image_path = generate(text=input_text, file_name=file_name, api_key=gemini_api_key, model=model)
+        if gemma_edited_image_path: # Check none or not
+            logger.debug(f"Image generated at path: {gemma_edited_image_path}")
+            result_img = Image.open(gemma_edited_image_path)
+            if result_img.mode == "RGBA":
+                result_img = result_img.convert("RGB")
+            return [result_img]
+        else:
+            logger.error("generate function returned None.")
+            return []  # Return empty when error
+    except Exception as e:
+        logger.exception("Error occurred in process_image_and_prompt")
+        return []  # Return empty when error
+# --- Gradio Interface ---
+with gr.Blocks() as demo:
+    gr.HTML(
+        """
+        <div style='display: flex; align-items: center; justify-content: center; gap: 20px'>
+        <div style="background-color: var(--block-background-fill); border-radius: 8px">
+            <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" style="width: 100px; height: 100px;">
+        </div>
+        <div>
+            <h1></h1>
+            <p>ပုံရိပ်တည်းဖြတ်ရန် Gemini</p>
+            <p>API Key ကို <a href="https://aistudio.google.com/apikey">ဤနေရာ</a> တွင် ဖန်တီးပါ</p>
+        </div>
+        </div>
+        """
+    )
+    gr.Markdown("ပုံတစ်ပုံ တင်ပြီး ပုံကိုတည်းဖြတ်ရန် သင်လိုချင်တာကို ရိုက်ထည့်ပါ။")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="ပုံတင်ရန်", image_mode="RGBA")
+            gemini_api_key = gr.Textbox(
+                lines=1,
+                placeholder="Gemini API Key ထည့်ပါ",
+                label="Gemini API Key",
+                type="password"
+            )
+            prompt_input = gr.Textbox(
+                lines=2,
+                placeholder="သင်လိုချင်တာကို ဤနေရာတွင် ရိုက်ထည့်ပါ...",
+                label="သင်လိုချင်တာ"
+            )
+            submit_btn = gr.Button("ထုတ်လုပ်ပါ")
+        with gr.Column():
+            output_gallery = gr.Gallery(label="ထုတ်လုပ်ပြီးရလဒ်များ")
+    submit_btn.click(
+        fn=process_image_and_prompt,
+        inputs=[image_input, prompt_input, gemini_api_key],
+        outputs=output_gallery,
+    )
+    # --- Test Code ---
+    # Create a dummy image (replace with your actual image if needed)
+    dummy_image = Image.new("RGBA", (100, 100), color="red")
+    dummy_prompt = "Make the image blue"
+    dummy_api_key = os.environ.get("GEMINI_API_KEY")  # Or put a placeholder key here for testing
+    # Call the function directly
+    logger.info("Calling process_image_and_prompt directly...")
+    result = process_image_and_prompt(dummy_image, dummy_prompt, dummy_api_key)
+    if result:
+        logger.info(f"Direct call successful. Result: {result}")
+        # result[0].show() # Uncomment to display image if running locally
+    else:
+        logger.error("Direct call failed.")