victorgg commited on
Commit
11d4840
·
verified ·
1 Parent(s): 55d2024

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -0
app.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import time
4
+ import uuid
5
+ import tempfile
6
+ from PIL import Image
7
+ import gradio as gr
8
+ import base64
9
+ import mimetypes
10
+ import logging
11
+
12
+ from google import genai
13
+ from google.genai import types
14
+
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.DEBUG,
17
+ format='%(asctime)s - %(levelname)s - %(message)s')
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def save_binary_file(file_name, data):
22
+ logger.debug(f"Saving binary data to file: {file_name}")
23
+ with open(file_name, "wb") as f:
24
+ f.write(data)
25
+ logger.debug(f"File saved successfully: {file_name}")
26
+
27
+
28
+ def generate(text, file_name, api_key, model="gemini-2.0-flash-exp-image-generation"):
29
+ logger.debug(f"Starting generate function with text: '{text}', file_name: '{file_name}', model: '{model}'")
30
+
31
+ try:
32
+ # Initialize client
33
+ effective_api_key = api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY")
34
+ logger.debug(f"Using API Key: {'Provided' if api_key.strip() else 'From Environment Variable'}")
35
+
36
+ if not effective_api_key:
37
+ logger.error("No API key provided or found in environment variable.")
38
+ raise ValueError("API key is required.")
39
+
40
+ client = genai.Client(api_key=effective_api_key)
41
+ logger.debug("Gemini client initialized.")
42
+
43
+
44
+ files = [
45
+ client.files.upload(file=file_name),
46
+ ]
47
+ logger.debug(f"File uploaded. URI: {files[0].uri}, MIME Type: {files[0].mime_type}")
48
+
49
+
50
+ contents = [
51
+ types.Content(
52
+ role="user",
53
+ parts=[
54
+ types.Part.from_uri(
55
+ file_uri=files[0].uri,
56
+ mime_type=files[0].mime_type,
57
+ ),
58
+ types.Part.from_text(text=text),
59
+ ],
60
+ ),
61
+ ]
62
+ logger.debug(f"Content object created: {contents}")
63
+
64
+ generate_content_config = types.GenerateContentConfig(
65
+ temperature=1,
66
+ top_p=0.95,
67
+ top_k=40,
68
+ max_output_tokens=8192,
69
+ response_modalities=[
70
+ "image",
71
+ "text",
72
+ ],
73
+ response_mime_type="text/plain",
74
+ )
75
+ logger.debug(f"Generate content config: {generate_content_config}")
76
+
77
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
78
+ temp_path = tmp.name
79
+ logger.debug(f"Temporary file created: {temp_path}")
80
+
81
+ response_stream = client.models.generate_content_stream(
82
+ model=model,
83
+ contents=contents,
84
+ config=generate_content_config,
85
+ )
86
+
87
+ logger.debug("Starting to process response stream...")
88
+ for chunk in response_stream:
89
+ if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
90
+ logger.warning("Chunk has no candidates, content, or parts. Skipping.")
91
+ continue
92
+
93
+ inline_data = chunk.candidates[0].content.parts[0].inline_data
94
+ if inline_data:
95
+ save_binary_file(temp_path, inline_data.data)
96
+ logger.info(f"File of mime type {inline_data.mime_type} saved to: {temp_path} and prompt input :{text}")
97
+ else:
98
+ logger.info(f"Received text: {chunk.text}")
99
+ print(chunk.text) # Keep the print for immediate console output
100
+
101
+ # Log the raw chunk for deeper inspection
102
+ logger.debug(f"Raw chunk: {chunk}")
103
+
104
+ del files
105
+ logger.debug("Uploaded files deleted.")
106
+ return temp_path
107
+
108
+ except Exception as e:
109
+ logger.exception("An error occurred during generation:") # This will log the full traceback
110
+ return None # Return None when error happens
111
+
112
+
113
+ def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
114
+ logger.debug(f"Starting process_image_and_prompt with prompt: '{prompt}'")
115
+ try:
116
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
117
+ composite_path = tmp.name
118
+ composite_pil.save(composite_path)
119
+ logger.debug(f"Composite image saved to: {composite_path}")
120
+
121
+ file_name = composite_path
122
+ input_text = prompt
123
+ model = "gemini-2.0-flash-exp-image-generation" # Consider changing this to "gemini-pro-vision"
124
+
125
+ gemma_edited_image_path = generate(text=input_text, file_name=file_name, api_key=gemini_api_key, model=model)
126
+
127
+ if gemma_edited_image_path: # Check none or not
128
+ logger.debug(f"Image generated at path: {gemma_edited_image_path}")
129
+ result_img = Image.open(gemma_edited_image_path)
130
+ if result_img.mode == "RGBA":
131
+ result_img = result_img.convert("RGB")
132
+ return [result_img]
133
+ else:
134
+ logger.error("generate function returned None.")
135
+ return [] # Return empty when error
136
+
137
+ except Exception as e:
138
+ logger.exception("Error occurred in process_image_and_prompt")
139
+ return [] # Return empty when error
140
+
141
+
142
+ # --- Gradio Interface ---
143
+ with gr.Blocks() as demo:
144
+ gr.HTML(
145
+ """
146
+ <div style='display: flex; align-items: center; justify-content: center; gap: 20px'>
147
+ <div style="background-color: var(--block-background-fill); border-radius: 8px">
148
+ <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" style="width: 100px; height: 100px;">
149
+ </div>
150
+ <div>
151
+ <h1></h1>
152
+ <p>ပုံရိပ်တည်းဖြတ်ရန် Gemini</p>
153
+ <p>API Key ကို <a href="https://aistudio.google.com/apikey">ဤနေရာ</a> တွင် ဖန်တီးပါ</p>
154
+ </div>
155
+ </div>
156
+ """
157
+ )
158
+ gr.Markdown("ပုံတစ်ပုံ တင်ပြီး ပုံကိုတည်းဖြတ်ရန် သင်လိုချင်တာကို ရိုက်ထည့်ပါ။")
159
+
160
+ with gr.Row():
161
+ with gr.Column():
162
+ image_input = gr.Image(type="pil", label="ပုံတင်ရန်", image_mode="RGBA")
163
+ gemini_api_key = gr.Textbox(
164
+ lines=1,
165
+ placeholder="Gemini API Key ထည့်ပါ",
166
+ label="Gemini API Key",
167
+ type="password"
168
+ )
169
+ prompt_input = gr.Textbox(
170
+ lines=2,
171
+ placeholder="သင်လိုချင်တာကို ဤနေရာတွင် ရိုက်ထည့်ပါ...",
172
+ label="သင်လိုချင်တာ"
173
+ )
174
+ submit_btn = gr.Button("ထုတ်လုပ်ပါ")
175
+ with gr.Column():
176
+ output_gallery = gr.Gallery(label="ထုတ်လုပ်ပြီးရလဒ်များ")
177
+
178
+ submit_btn.click(
179
+ fn=process_image_and_prompt,
180
+ inputs=[image_input, prompt_input, gemini_api_key],
181
+ outputs=output_gallery,
182
+ )
183
+
184
+ # --- Test Code ---
185
+ # Create a dummy image (replace with your actual image if needed)
186
+ dummy_image = Image.new("RGBA", (100, 100), color="red")
187
+ dummy_prompt = "Make the image blue"
188
+ dummy_api_key = os.environ.get("GEMINI_API_KEY") # Or put a placeholder key here for testing
189
+
190
+ # Call the function directly
191
+ logger.info("Calling process_image_and_prompt directly...")
192
+ result = process_image_and_prompt(dummy_image, dummy_prompt, dummy_api_key)
193
+
194
+ if result:
195
+ logger.info(f"Direct call successful. Result: {result}")
196
+ # result[0].show() # Uncomment to display image if running locally
197
+ else:
198
+ logger.error("Direct call failed.")
199
+