Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,22 +1,40 @@
|
|
1 |
-
import spaces
|
2 |
-
import argparse
|
3 |
import os
|
4 |
import time
|
5 |
from os import path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from safetensors.torch import load_file
|
7 |
from huggingface_hub import hf_hub_download
|
8 |
|
9 |
-
|
10 |
-
os.environ["TRANSFORMERS_CACHE"] = cache_path
|
11 |
-
os.environ["HF_HUB_CACHE"] = cache_path
|
12 |
-
os.environ["HF_HOME"] = cache_path
|
13 |
-
|
14 |
import gradio as gr
|
15 |
-
import torch
|
16 |
from diffusers import FluxPipeline
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
class timer:
|
21 |
def __init__(self, method_name="timed process"):
|
22 |
self.method = method_name
|
@@ -27,83 +45,234 @@ class timer:
|
|
27 |
end = time.time()
|
28 |
print(f"{self.method} took {str(round(end - self.start, 2))}s")
|
29 |
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
32 |
|
33 |
-
|
34 |
-
pipe.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
pipe.fuse_lora(lora_scale=0.125)
|
|
|
|
|
36 |
pipe.to(device="cuda", dtype=torch.bfloat16)
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
)
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
gr.Markdown(
|
75 |
"""
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
"""
|
86 |
)
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
|
|
102 |
generate_btn.click(
|
103 |
-
|
104 |
-
inputs=[height, width, steps,
|
105 |
-
outputs=
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
)
|
107 |
|
|
|
108 |
if __name__ == "__main__":
|
109 |
-
demo.launch()
|
|
|
|
|
|
|
1 |
import os
|
2 |
import time
|
3 |
from os import path
|
4 |
+
import tempfile
|
5 |
+
import uuid
|
6 |
+
import base64
|
7 |
+
import mimetypes
|
8 |
+
import json
|
9 |
+
|
10 |
+
import torch
|
11 |
from safetensors.torch import load_file
|
12 |
from huggingface_hub import hf_hub_download
|
13 |
|
14 |
+
# Diffusers ๊ด๋ จ ๋ผ์ด๋ธ๋ฌ๋ฆฌ
|
|
|
|
|
|
|
|
|
15 |
import gradio as gr
|
|
|
16 |
from diffusers import FluxPipeline
|
17 |
|
18 |
+
# Google GenAI ๋ผ์ด๋ธ๋ฌ๋ฆฌ
|
19 |
+
from google import genai
|
20 |
+
from google.genai import types
|
21 |
+
|
22 |
+
#######################################
|
23 |
+
# 0. ํ๊ฒฝ์ค์
|
24 |
+
#######################################
|
25 |
+
|
26 |
+
# ๋ชจ๋ธ ์บ์ ๋๋ ํ ๋ฆฌ ์ค์
|
27 |
+
BASE_DIR = path.dirname(path.abspath(__file__)) if "__file__" in globals() else os.getcwd()
|
28 |
+
CACHE_PATH = path.join(BASE_DIR, "models")
|
29 |
|
30 |
+
os.environ["TRANSFORMERS_CACHE"] = CACHE_PATH
|
31 |
+
os.environ["HF_HUB_CACHE"] = CACHE_PATH
|
32 |
+
os.environ["HF_HOME"] = CACHE_PATH
|
33 |
+
|
34 |
+
# Google GenAI ์ฌ์ฉ์ ์ํด์๋ ๋ค์๊ณผ ๊ฐ์ ํ๊ฒฝ ๋ณ์๊ฐ ํ์ํฉ๋๋ค.
|
35 |
+
# os.environ["GAPI_TOKEN"] = "<YOUR_GOOGLE_GENAI_API_KEY>"
|
36 |
+
|
37 |
+
# ์์
์๊ฐ ์ธก์ ์ ์ํ ๊ฐ๋จํ ํ์ด๋จธ ํด๋์ค
|
38 |
class timer:
|
39 |
def __init__(self, method_name="timed process"):
|
40 |
self.method = method_name
|
|
|
45 |
end = time.time()
|
46 |
print(f"{self.method} took {str(round(end - self.start, 2))}s")
|
47 |
|
48 |
+
#######################################
|
49 |
+
# 1. FLUX ํ์ดํ๋ผ์ธ ๋ก๋
|
50 |
+
#######################################
|
51 |
+
|
52 |
+
if not path.exists(CACHE_PATH):
|
53 |
+
os.makedirs(CACHE_PATH, exist_ok=True)
|
54 |
|
55 |
+
# FLUX ํ์ดํ๋ผ์ธ ๋ก๋
|
56 |
+
pipe = FluxPipeline.from_pretrained(
|
57 |
+
"black-forest-labs/FLUX.1-dev",
|
58 |
+
torch_dtype=torch.bfloat16
|
59 |
+
)
|
60 |
+
|
61 |
+
# LoRA ๊ฐ์ค์น ๋ก๋
|
62 |
+
lora_path = hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors")
|
63 |
+
pipe.load_lora_weights(lora_path)
|
64 |
pipe.fuse_lora(lora_scale=0.125)
|
65 |
+
|
66 |
+
# GPU๋ก ์ฎ๊ธฐ๊ธฐ
|
67 |
pipe.to(device="cuda", dtype=torch.bfloat16)
|
68 |
|
69 |
+
#######################################
|
70 |
+
# 2. Google GenAI๋ฅผ ํตํ ์ด๋ฏธ์ง ๋ด ํ
์คํธ ๋ณํ ํจ์
|
71 |
+
#######################################
|
72 |
+
|
73 |
+
def save_binary_file(file_name, data):
|
74 |
+
"""Google GenAI์์ ์๋ต๋ฐ์ ์ด์ง ๋ฐ์ดํฐ๋ฅผ ์ด๋ฏธ์ง ํ์ผ๋ก ์ ์ฅ"""
|
75 |
+
with open(file_name, "wb") as f:
|
76 |
+
f.write(data)
|
77 |
+
|
78 |
+
def generate_by_google_genai(text, file_name, model="gemini-2.0-flash-exp"):
|
79 |
+
"""
|
80 |
+
Google GenAI(gemini) ๋ชจ๋ธ์ ํตํด ์ด๋ฏธ์ง/ํ
์คํธ๋ฅผ ์์ฑํ๊ฑฐ๋ ๋ณํ.
|
81 |
+
- text: ๋ณ๊ฒฝํ ํ
์คํธ๋ ๋ช
๋ น์ด ๋ฑ ํ๋กฌํํธ
|
82 |
+
- file_name: ์๋ณธ ์ด๋ฏธ์ง(์: .png) ๊ฒฝ๋ก
|
83 |
+
- model: ์ฌ์ฉํ gemini ๋ชจ๋ธ ์ด๋ฆ
|
84 |
+
"""
|
85 |
+
# 1) Google Client ์ด๊ธฐํ
|
86 |
+
client = genai.Client(api_key=os.getenv("GAPI_TOKEN"))
|
87 |
+
|
88 |
+
# 2) ์ด๋ฏธ์ง ์
๋ก๋
|
89 |
+
files = [client.files.upload(file=file_name)]
|
90 |
+
|
91 |
+
# 3) gemini์ ์ ๋ฌํ Content ์ค๋น (์ด๋ฏธ์ง + ํ๋กฌํํธ)
|
92 |
+
contents = [
|
93 |
+
types.Content(
|
94 |
+
role="user",
|
95 |
+
parts=[
|
96 |
+
types.Part.from_uri(
|
97 |
+
file_uri=files[0].uri,
|
98 |
+
mime_type=files[0].mime_type,
|
99 |
+
),
|
100 |
+
types.Part.from_text(text=text),
|
101 |
+
],
|
102 |
+
),
|
103 |
+
]
|
104 |
+
|
105 |
+
# 4) ์์ฑ/๋ณํ ์ค์
|
106 |
+
generate_content_config = types.GenerateContentConfig(
|
107 |
+
temperature=1,
|
108 |
+
top_p=0.95,
|
109 |
+
top_k=40,
|
110 |
+
max_output_tokens=8192,
|
111 |
+
response_modalities=["image", "text"],
|
112 |
+
response_mime_type="text/plain",
|
113 |
)
|
114 |
|
115 |
+
text_response = ""
|
116 |
+
image_path = None
|
117 |
+
|
118 |
+
# ์์ ํ์ผ๋ก ์ด๋ฏธ์ง ๋ฐ์ ์ค๋น
|
119 |
+
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
|
120 |
+
temp_path = tmp.name
|
121 |
+
# 5) ์คํธ๋ฆผ ํํ๋ก ์๋ต ๋ฐ์์ ์ด๋ฏธ์ง/ํ
์คํธ ๊ตฌ๋ถ ์ฒ๋ฆฌ
|
122 |
+
for chunk in client.models.generate_content_stream(
|
123 |
+
model=model,
|
124 |
+
contents=contents,
|
125 |
+
config=generate_content_config,
|
126 |
+
):
|
127 |
+
if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
|
128 |
+
continue
|
129 |
+
candidate = chunk.candidates[0].content.parts[0]
|
130 |
+
|
131 |
+
# inline_data๊ฐ ์์ผ๋ฉด ์ด๋ฏธ์ง ์๋ต
|
132 |
+
if candidate.inline_data:
|
133 |
+
save_binary_file(temp_path, candidate.inline_data.data)
|
134 |
+
print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path}")
|
135 |
+
image_path = temp_path
|
136 |
+
break # ์ด๋ฏธ์ง๊ฐ ์ค๋ฉด ์ฐ์ ๋ฉ์ถค
|
137 |
+
else:
|
138 |
+
# ์์ผ๋ฉด ํ
์คํธ๋ฅผ ๋์
|
139 |
+
text_response += chunk.text + "\n"
|
140 |
|
141 |
+
# ์
๋ก๋ ํ์ผ(google.genai.files.File) ๊ฐ์ฒด ์ ๊ฑฐ
|
142 |
+
del files
|
143 |
+
|
144 |
+
return image_path, text_response
|
145 |
+
|
146 |
+
#######################################
|
147 |
+
# 3. Gradio ํจ์: (1) FLUX๋ก ์ด๋ฏธ์ง ์์ฑ -> (2) Google GenAI๋ก ํ
์คํธ ๊ต์ฒด
|
148 |
+
#######################################
|
149 |
+
|
150 |
+
def generate_initial_image(prompt, text, height, width, steps, scale, seed):
|
151 |
+
"""
|
152 |
+
FLUX ํ์ดํ๋ผ์ธ์ ์ฌ์ฉํด 'ํ
์คํธ๊ฐ ํฌํจ๋ ์ด๋ฏธ์ง๋ฅผ' ๋จผ์ ์์ฑํ๋ ํจ์.
|
153 |
+
prompt: ์ด๋ฏธ์ง ๋ฐฐ๊ฒฝ/์ฅ๋ฉด/์คํ์ผ ๋ฌ์ฌ๋ฅผ ์ํ ํ๋กฌํํธ
|
154 |
+
text: ์ค์ ๋ก ์ด๋ฏธ์ง์ ๋ค์ด๊ฐ์ผ ํ ๋ฌธ๊ตฌ(์: "์๋
ํ์ธ์", "Hello world" ๋ฑ)
|
155 |
+
"""
|
156 |
+
# ์ด๋ฏธ์ง์ ํ
์คํธ๋ฅผ ํฌํจ์ํค๋ ค๋ฉด ํ๋กฌํํธ์ ์ง์ ๋ฌธ๊ตฌ ์์ฒญ์ ๋ฃ๋ ๊ฒ์ด ์ค์.
|
157 |
+
# Diffusion ๋ชจ๋ธ์ ๋ฐ๋ผ ์ ๋ฐ์๋์ง ์์ ์๋ ์์ผ๋, ๊ตฌ์ฒด์ ์ผ๋ก ๊ธฐ์ฌํ ์๋ก ์ ๋ฆฌ.
|
158 |
+
# ์: "A poster with large bold Korean text that says '์๋
ํ์ธ์' in red color ..."
|
159 |
+
# ์ฌ๊ธฐ์๋ ๊ฐ๋จํ prompt ๋ค์ ํ
์คํธ ์ฝ์
์์๋ฅผ ๋ณด์ฌ์ค
|
160 |
+
combined_prompt = f"{prompt} with clear readable text that says '{text}'"
|
161 |
+
|
162 |
+
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("inference"):
|
163 |
+
result = pipe(
|
164 |
+
prompt=[combined_prompt],
|
165 |
+
generator=torch.Generator().manual_seed(int(seed)),
|
166 |
+
num_inference_steps=int(steps),
|
167 |
+
guidance_scale=float(scale),
|
168 |
+
height=int(height),
|
169 |
+
width=int(width),
|
170 |
+
max_sequence_length=256
|
171 |
+
).images[0]
|
172 |
+
|
173 |
+
return result
|
174 |
+
|
175 |
+
def change_text_in_image(original_image, new_text):
|
176 |
+
"""
|
177 |
+
Google GenAI์ gemini ๋ชจ๋ธ์ ํตํด,
|
178 |
+
์
๋ก๋๋ ์ด๋ฏธ์ง ๋ด๋ถ์ ๋ฌธ๊ตฌ๋ฅผ `new_text`๋ก ๋ณ๊ฒฝํด์ฃผ๋ ํจ์.
|
179 |
+
"""
|
180 |
+
try:
|
181 |
+
# ์์ ํ์ผ์ ๋จผ์ ์ ์ฅ
|
182 |
+
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
|
183 |
+
original_path = tmp.name
|
184 |
+
original_image.save(original_path)
|
185 |
+
|
186 |
+
# Gemini ๋ชจ๋ธ ํธ์ถ
|
187 |
+
image_path, text_response = generate_by_google_genai(
|
188 |
+
text=f"Change the text in this image to: '{new_text}'",
|
189 |
+
file_name=original_path
|
190 |
+
)
|
191 |
+
|
192 |
+
# ๊ฒฐ๊ณผ๊ฐ ์ด๋ฏธ์ง๋ก ์๋ค๋ฉด
|
193 |
+
if image_path:
|
194 |
+
modified_img = gr.processing_utils.decode_base64_to_image(
|
195 |
+
base64.b64encode(open(image_path, "rb").read())
|
196 |
+
)
|
197 |
+
return modified_img, "" # (๊ฒฐ๊ณผ ์ด๋ฏธ์ง, ๋น ํ
์คํธ)
|
198 |
+
else:
|
199 |
+
# ์ด๋ฏธ์ง๊ฐ ์์ด ํ
์คํธ๋ง ์๋ต์ผ๋ก ์จ ๊ฒฝ์ฐ
|
200 |
+
return None, text_response
|
201 |
+
|
202 |
+
except Exception as e:
|
203 |
+
raise gr.Error(f"Error: {e}")
|
204 |
+
|
205 |
+
|
206 |
+
#######################################
|
207 |
+
# 4. Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ
|
208 |
+
#######################################
|
209 |
+
|
210 |
+
with gr.Blocks(title="Flux + Google GenAI Text Replacement") as demo:
|
211 |
gr.Markdown(
|
212 |
"""
|
213 |
+
# Flux ๊ธฐ๋ฐ ์ด๋ฏธ์ง ์์ฑ + Google GenAI๋ฅผ ํตํ ํ
์คํธ ๋ณํ
|
214 |
+
**์ด ๋ฐ๋ชจ๋ ์๋ ๋ ๋จ๊ณ๋ฅผ ๋ณด์ฌ์ค๋๋ค.**
|
215 |
+
|
216 |
+
1) **Diffusion ๋ชจ๋ธ(FluxPipeline)์ ์ด์ฉํด** ์ด๋ฏธ์ง ์์ฑ.
|
217 |
+
- ์ด๋, ์ฌ์ฉ์๊ฐ ์ง์ ํ ํ
์คํธ๋ฅผ ์ด๋ฏธ์ง ์์ ํ์ํ๋๋ก ์๋ํฉ๋๋ค.
|
218 |
+
2) **์์ฑ๋ ์ด๋ฏธ์ง๋ฅผ Google GenAI(gemini) ๋ชจ๋ธ์ ์ ๋ฌ**ํ์ฌ,
|
219 |
+
- ์ด๋ฏธ์ง ๋ด ํ
์คํธ ๋ถ๋ถ๋ง ๋ค๋ฅธ ๋ฌธ์์ด๋ก ๋ณ๊ฒฝ.
|
220 |
+
|
221 |
+
---
|
222 |
"""
|
223 |
)
|
224 |
|
225 |
+
with gr.Row():
|
226 |
+
with gr.Column():
|
227 |
+
gr.Markdown("## 1) Step 1: FLUX๋ก ํ
์คํธ ํฌํจ ์ด๋ฏธ์ง ์์ฑ")
|
228 |
+
prompt_input = gr.Textbox(
|
229 |
+
lines=3,
|
230 |
+
label="์ด๋ฏธ์ง ์ฅ๋ฉด/๋ฐฐ๊ฒฝ Prompt",
|
231 |
+
placeholder="์) A poster with futuristic neon style..."
|
232 |
+
)
|
233 |
+
text_input = gr.Textbox(
|
234 |
+
lines=1,
|
235 |
+
label="์ด๋ฏธ์ง ์์ ๋ค์ด๊ฐ ํ
์คํธ",
|
236 |
+
placeholder="์) ์๋
ํ์ธ์"
|
237 |
+
)
|
238 |
+
with gr.Accordion("๊ณ ๊ธ ์ค์ (ํ์ฅ)", open=False):
|
239 |
+
height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=512)
|
240 |
+
width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=512)
|
241 |
+
steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
|
242 |
+
scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5)
|
243 |
+
seed = gr.Number(label="Seed (reproducibility)", value=1234, precision=0)
|
244 |
+
|
245 |
+
generate_btn = gr.Button("Generate Base Image", variant="primary")
|
246 |
+
|
247 |
+
# ์์ฑ ๊ฒฐ๊ณผ ํ์
|
248 |
+
generated_image = gr.Image(
|
249 |
+
label="Generated Image (with text)",
|
250 |
+
type="pil"
|
251 |
+
)
|
252 |
+
|
253 |
+
with gr.Column():
|
254 |
+
gr.Markdown("## 2) Step 2: ์์ฑ๋ ์ด๋ฏธ์ง ๋ด ํ
์คํธ ์์ ")
|
255 |
+
new_text_input = gr.Textbox(
|
256 |
+
label="์๋ก ๋ฐ๊ฟ ํ
์คํธ",
|
257 |
+
placeholder="์) Hello world"
|
258 |
+
)
|
259 |
+
modify_btn = gr.Button("Change Text in Image via Gemini", variant="secondary")
|
260 |
+
output_img = gr.Image(label="Modified Image", type="pil")
|
261 |
+
output_txt = gr.Textbox(label="(If only text returned)")
|
262 |
|
263 |
+
# ๋ฒํผ ์ก์
์ฐ๊ฒฐ
|
264 |
generate_btn.click(
|
265 |
+
fn=generate_initial_image,
|
266 |
+
inputs=[prompt_input, text_input, height, width, steps, scale, seed],
|
267 |
+
outputs=[generated_image]
|
268 |
+
)
|
269 |
+
|
270 |
+
modify_btn.click(
|
271 |
+
fn=change_text_in_image,
|
272 |
+
inputs=[generated_image, new_text_input],
|
273 |
+
outputs=[output_img, output_txt]
|
274 |
)
|
275 |
|
276 |
+
# ์ค์ ์คํ ์์๋ ์๋์ ๊ฐ์ด demo.launch()๋ฅผ ํธ์ถํฉ๋๋ค.
|
277 |
if __name__ == "__main__":
|
278 |
+
demo.queue(concurrency_count=1, max_size=20).launch()
|