Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,22 +1,32 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
|
|
|
|
5 |
import numpy as np
|
6 |
-
import
|
7 |
from PIL import Image
|
|
|
|
|
8 |
from diffusers import StableDiffusionXLPipeline
|
9 |
import open_clip
|
10 |
from huggingface_hub import hf_hub_download
|
11 |
from IP_Composer.IP_Adapter.ip_adapter import IPAdapterXL
|
12 |
-
from IP_Composer.perform_swap import compute_dataset_embeds_svd,
|
13 |
-
|
|
|
|
|
14 |
import spaces
|
15 |
-
import random
|
16 |
|
|
|
|
|
|
|
17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
|
19 |
-
#
|
|
|
|
|
20 |
base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
|
21 |
pipe = StableDiffusionXLPipeline.from_pretrained(
|
22 |
base_model_path,
|
@@ -24,19 +34,35 @@ pipe = StableDiffusionXLPipeline.from_pretrained(
|
|
24 |
add_watermarker=False,
|
25 |
)
|
26 |
|
27 |
-
#
|
28 |
-
|
|
|
|
|
29 |
image_encoder_subfolder = 'models/image_encoder'
|
30 |
-
ip_ckpt = hf_hub_download(
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
-
#
|
34 |
-
|
|
|
|
|
|
|
|
|
35 |
clip_model.to(device)
|
36 |
-
tokenizer = open_clip.get_tokenizer(
|
37 |
-
|
|
|
38 |
|
39 |
-
|
|
|
|
|
|
|
40 |
"age": "age_descriptions.npy",
|
41 |
"animal fur": "fur_descriptions.npy",
|
42 |
"dogs": "dog_descriptions.npy",
|
@@ -53,116 +79,87 @@ CONCEPTS_MAP={
|
|
53 |
"season": "season_descriptions.npy",
|
54 |
"material": "material_descriptions_with_gems.npy"
|
55 |
}
|
56 |
-
RANKS_MAP={
|
57 |
-
"age": 30,
|
58 |
-
"
|
59 |
-
"
|
60 |
-
"
|
61 |
-
"
|
62 |
-
"fruit/vegtable": 30,
|
63 |
-
"outfit type": 30,
|
64 |
-
"outfit pattern (including color)": 80,
|
65 |
-
"patterns": 80,
|
66 |
-
"patterns (including color)": 80,
|
67 |
-
"vehicle": 30,
|
68 |
-
"daytime": 30,
|
69 |
-
"pose": 30,
|
70 |
-
"season": 30,
|
71 |
-
"material": 80,
|
72 |
}
|
73 |
concept_options = list(CONCEPTS_MAP.keys())
|
74 |
|
75 |
-
|
|
|
|
|
76 |
examples = [
|
77 |
-
['./IP_Composer/assets/patterns/base.jpg',
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
['./IP_Composer/assets/
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
['./IP_Composer/assets/
|
86 |
-
|
|
|
|
|
|
|
87 |
]
|
88 |
|
|
|
|
|
|
|
89 |
def generate_examples(base_image,
|
90 |
concept_image1, concept_name1,
|
91 |
concept_image2, concept_name2,
|
92 |
concept_image3, concept_name3,
|
93 |
rank1, rank2, rank3,
|
94 |
prompt, scale, seed, num_inference_steps):
|
95 |
-
return process_and_display(base_image,
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
|
103 |
-
|
104 |
-
|
105 |
MAX_SEED = np.iinfo(np.int32).max
|
106 |
def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
107 |
-
if randomize_seed
|
108 |
-
seed = random.randint(0, MAX_SEED)
|
109 |
-
return seed
|
110 |
|
111 |
-
def change_rank_default(concept_name):
|
112 |
return RANKS_MAP.get(concept_name, 30)
|
113 |
|
114 |
@spaces.GPU
|
115 |
def match_image_to_concept(image):
|
116 |
-
"""
|
117 |
-
Match an uploaded image to the closest concept type using CLIP embeddings
|
118 |
-
"""
|
119 |
if image is None:
|
120 |
return None
|
121 |
-
|
122 |
-
# Get image embeddings
|
123 |
-
img_pil = Image.fromarray(image).convert("RGB")
|
124 |
img_embed = get_image_embeds(img_pil, clip_model, preprocess, device)
|
125 |
-
|
126 |
-
# Calculate similarity to each concept
|
127 |
similarities = {}
|
128 |
for concept_name, concept_file in CONCEPTS_MAP.items():
|
129 |
try:
|
130 |
-
# Load concept embeddings
|
131 |
embeds_path = f"./IP_Composer/text_embeddings/{concept_file}"
|
132 |
with open(embeds_path, "rb") as f:
|
133 |
concept_embeds = np.load(f)
|
134 |
-
|
135 |
-
# Calculate similarity to each text embedding
|
136 |
sim_scores = []
|
137 |
for embed in concept_embeds:
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
# Calculate cosine similarity
|
143 |
-
similarity = np.dot(img_embed_norm.flatten(), text_embed_norm.flatten())
|
144 |
-
sim_scores.append(similarity)
|
145 |
-
|
146 |
-
# Use the average of top 5 similarities for better matching
|
147 |
sim_scores.sort(reverse=True)
|
148 |
-
|
149 |
-
avg_similarity = sum(top_similarities) / len(top_similarities)
|
150 |
-
|
151 |
-
similarities[concept_name] = avg_similarity
|
152 |
except Exception as e:
|
153 |
-
print(f"
|
154 |
-
|
155 |
-
# Return the concept with highest similarity
|
156 |
if similarities:
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
return matched_concept
|
161 |
return None
|
162 |
|
163 |
@spaces.GPU
|
164 |
def get_image_embeds(pil_image, model=clip_model, preproc=preprocess, dev=device):
|
165 |
-
"""Get CLIP image embeddings for a given PIL image"""
|
166 |
image = preproc(pil_image)[np.newaxis, :, :, :]
|
167 |
with torch.no_grad():
|
168 |
embeds = model.encode_image(image.to(dev))
|
@@ -175,114 +172,72 @@ def process_images(
|
|
175 |
concept_image2=None, concept_name2=None,
|
176 |
concept_image3=None, concept_name3=None,
|
177 |
rank1=10, rank2=10, rank3=10,
|
178 |
-
prompt=None,
|
179 |
-
|
180 |
-
|
181 |
-
num_inference_steps=50,
|
182 |
-
concpet_from_file_1 = None,
|
183 |
-
concpet_from_file_2 = None,
|
184 |
-
concpet_from_file_3 = None,
|
185 |
-
use_concpet_from_file_1 = False,
|
186 |
-
use_concpet_from_file_2 = False,
|
187 |
-
use_concpet_from_file_3 = False
|
188 |
):
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
skip_load_concept =[False,False, False]
|
199 |
-
|
200 |
-
# for demo purposes we allow for up to 3 different concepts and corresponding concept images
|
201 |
-
if concept_image1 is not None:
|
202 |
-
concept_images.append(concept_image1)
|
203 |
-
if use_concpet_from_file_1 and concpet_from_file_1 is not None: # if concept is new from user input
|
204 |
-
concept_descriptions.append(concpet_from_file_1)
|
205 |
-
skip_load_concept[0] = True
|
206 |
-
else:
|
207 |
-
concept_descriptions.append(CONCEPTS_MAP[concept_name1])
|
208 |
-
else:
|
209 |
return None, "Please upload at least one concept image"
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
if concept_image2 is not None:
|
213 |
concept_images.append(concept_image2)
|
214 |
-
if use_concpet_from_file_2 and concpet_from_file_2 is not None:
|
215 |
-
|
216 |
-
skip_load_concept[1] = True
|
217 |
else:
|
218 |
-
|
219 |
-
|
220 |
-
|
|
|
221 |
if concept_image3 is not None:
|
222 |
concept_images.append(concept_image3)
|
223 |
-
if use_concpet_from_file_3 and concpet_from_file_3 is not None:
|
224 |
-
|
225 |
-
skip_load_concept[2] = True
|
226 |
else:
|
227 |
-
|
228 |
-
|
229 |
-
# Get all ranks
|
230 |
-
ranks = [rank1]
|
231 |
-
if concept_image2 is not None:
|
232 |
-
ranks.append(rank2)
|
233 |
-
if concept_image3 is not None:
|
234 |
ranks.append(rank3)
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
projection_matrices = []
|
239 |
-
# for the demo, we assume 1 concept image per concept
|
240 |
-
# for each concept image, we calculate it's image embeedings and load the concepts textual embeddings to copmpute the projection matrix over it
|
241 |
-
for i, concept in enumerate(concept_descriptions):
|
242 |
img_pil = Image.fromarray(concept_images[i]).convert("RGB")
|
243 |
concept_embeds.append(get_image_embeds(img_pil, clip_model, preprocess, device))
|
244 |
-
if
|
245 |
-
|
246 |
else:
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
projection_matrix = compute_dataset_embeds_svd(all_embeds_in, ranks[i])
|
252 |
-
projection_matrices.append(projection_matrix)
|
253 |
-
|
254 |
-
|
255 |
-
# Create projection data structure for the composition
|
256 |
projections_data = [
|
257 |
-
{
|
258 |
-
|
259 |
-
"projection_matrix": proj_matrix
|
260 |
-
}
|
261 |
-
for embed, proj_matrix in zip(concept_embeds, projection_matrices)
|
262 |
]
|
263 |
-
|
264 |
-
# Generate modified images -
|
265 |
modified_images = get_modified_images_embeds_composition(
|
266 |
-
base_embed,
|
267 |
-
|
268 |
-
|
269 |
-
prompt=prompt,
|
270 |
-
scale=scale,
|
271 |
-
num_samples=1,
|
272 |
-
seed=seed,
|
273 |
-
num_inference_steps=num_inference_steps
|
274 |
)
|
275 |
-
|
276 |
return modified_images[0]
|
277 |
|
278 |
@spaces.GPU
|
279 |
def get_text_embeddings(concept_file):
|
280 |
-
print("generating text embeddings")
|
281 |
descriptions = load_descriptions(concept_file)
|
282 |
-
embeddings
|
283 |
-
|
284 |
return embeddings, True
|
285 |
-
|
286 |
|
287 |
def process_and_display(
|
288 |
base_image,
|
@@ -291,235 +246,184 @@ def process_and_display(
|
|
291 |
concept_image3=None, concept_name3=None,
|
292 |
rank1=30, rank2=30, rank3=30,
|
293 |
prompt=None, scale=1.0, seed=0, num_inference_steps=50,
|
294 |
-
concpet_from_file_1 = None,
|
295 |
-
|
296 |
-
concpet_from_file_3 = None,
|
297 |
-
use_concpet_from_file_1 = False,
|
298 |
-
use_concpet_from_file_2 = False,
|
299 |
-
use_concpet_from_file_3 = False
|
300 |
):
|
301 |
if base_image is None:
|
302 |
raise gr.Error("Please upload a base image")
|
303 |
-
|
304 |
if concept_image1 is None:
|
305 |
raise gr.Error("Choose at least one concept image")
|
306 |
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
modified_images = process_images(
|
311 |
-
base_image,
|
312 |
-
concept_image1, concept_name1,
|
313 |
concept_image2, concept_name2,
|
314 |
concept_image3, concept_name3,
|
315 |
-
rank1, rank2, rank3,
|
316 |
prompt, scale, seed, num_inference_steps,
|
317 |
-
concpet_from_file_1,
|
318 |
-
|
319 |
-
concpet_from_file_3,
|
320 |
-
use_concpet_from_file_1,
|
321 |
-
use_concpet_from_file_2,
|
322 |
-
use_concpet_from_file_3
|
323 |
)
|
324 |
-
|
325 |
-
return modified_images
|
326 |
|
327 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
css = """
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
}
|
333 |
-
|
334 |
-
|
335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
}
|
|
|
|
|
337 |
"""
|
338 |
-
example = """
|
339 |
-
Emotion Description
|
340 |
|
341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
|
343 |
-
|
|
|
|
|
|
|
344 |
|
345 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
|
347 |
-
|
|
|
|
|
|
|
|
|
|
|
348 |
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
use_concpet_from_file_3 = gr.State()
|
366 |
-
with gr.Row():
|
367 |
-
with gr.Column():
|
368 |
-
base_image = gr.Image(label="Base Image (Required)", type="numpy", height=400, width=400)
|
369 |
-
with gr.Tab("Concept 1"):
|
370 |
-
with gr.Group():
|
371 |
-
concept_image1 = gr.Image(label="Concept Image 1", type="numpy", height=400, width=400)
|
372 |
-
with gr.Column():
|
373 |
-
concept_name1 = gr.Dropdown(concept_options, label="Concept 1", value=None, info="Pick concept type")
|
374 |
-
with gr.Accordion("💡 Or use a new concept 👇", open=False):
|
375 |
-
gr.Markdown("1. Upload a file with text variations of your concept (e.g. ask an LLM)")
|
376 |
-
gr.Markdown("2. Prefereably with > 100 variations.")
|
377 |
-
with gr.Accordion("File example for the concept 'emotions'", open=False):
|
378 |
-
gr.Markdown(example)
|
379 |
-
concept_file_1 = gr.File(label="Concept variations", file_types=["text"])
|
380 |
-
|
381 |
-
with gr.Tab("Concept 2 (Optional)"):
|
382 |
-
with gr.Group():
|
383 |
-
concept_image2 = gr.Image(label="Concept Image 2", type="numpy", height=400, width=400)
|
384 |
-
with gr.Column():
|
385 |
-
concept_name2 = gr.Dropdown(concept_options, label="Concept 2", value=None, info="Pick concept type")
|
386 |
-
with gr.Accordion("💡 Or use a new concept 👇", open=False):
|
387 |
-
gr.Markdown("1. Upload a file with text variations of your concept (e.g. ask an LLM)")
|
388 |
-
gr.Markdown("2. Prefereably with > 100 variations.")
|
389 |
-
with gr.Accordion("File example for the concept 'emotions'", open=False):
|
390 |
-
gr.Markdown(example)
|
391 |
-
concept_file_2 = gr.File(label="Concept variations", file_types=["text"])
|
392 |
-
|
393 |
-
|
394 |
-
with gr.Tab("Concept 3 (optional)"):
|
395 |
-
with gr.Group():
|
396 |
-
concept_image3 = gr.Image(label="Concept Image 3", type="numpy", height=400, width=400)
|
397 |
-
with gr.Column():
|
398 |
-
concept_name3 = gr.Dropdown(concept_options, label="Concept 3", value= None, info="Pick concept type")
|
399 |
-
with gr.Accordion("💡 Or use a new concept 👇", open=False):
|
400 |
-
gr.Markdown("1. Upload a file with text variations of your concept (e.g. ask an LLM)")
|
401 |
-
gr.Markdown("2. Prefereably with > 100 variations.")
|
402 |
-
with gr.Accordion("File example for the concept 'emotions'", open=False):
|
403 |
-
gr.Markdown(example)
|
404 |
-
concept_file_3 = gr.File(label="Concept variations", file_types=["text"])
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
with gr.Accordion("Advanced options", open=False):
|
409 |
-
prompt = gr.Textbox(label="Guidance Prompt (Optional)", placeholder="Optional text prompt to guide generation")
|
410 |
-
num_inference_steps = gr.Slider(minimum=1, maximum=50, value=30, step=1, label="Num steps")
|
411 |
-
with gr.Row():
|
412 |
-
scale = gr.Slider(minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Scale")
|
413 |
-
randomize_seed = gr.Checkbox(value=True, label="Randomize seed")
|
414 |
-
seed = gr.Number(value=0, label="Seed", precision=0)
|
415 |
-
with gr.Column():
|
416 |
-
gr.Markdown("If a concept is not showing enough, try to increase the rank")
|
417 |
-
with gr.Row():
|
418 |
-
rank1 = gr.Slider(minimum=1, maximum=150, value=30, step=1, label="Rank concept 1")
|
419 |
-
rank2 = gr.Slider(minimum=1, maximum=150, value=30, step=1, label="Rank concept 2")
|
420 |
-
rank3 = gr.Slider(minimum=1, maximum=150, value=30, step=1, label="Rank concept 3")
|
421 |
-
|
422 |
-
with gr.Column():
|
423 |
-
output_image = gr.Image(label="Composed output", show_label=True,height=400, width=400 )
|
424 |
-
submit_btn = gr.Button("Generate")
|
425 |
-
|
426 |
-
gr.Examples(
|
427 |
examples,
|
428 |
-
inputs=[base_image,
|
429 |
-
concept_image1, concept_name1,
|
430 |
concept_image2, concept_name2,
|
431 |
concept_image3, concept_name3,
|
432 |
-
rank1, rank2, rank3,
|
433 |
prompt, scale, seed, num_inference_steps],
|
434 |
outputs=[output_image],
|
435 |
fn=generate_examples,
|
436 |
cache_examples=False
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
concept_image1, concept_name1,
|
479 |
-
concept_image2, concept_name2,
|
480 |
-
concept_image3, concept_name3,
|
481 |
-
rank1, rank2, rank3,
|
482 |
-
prompt, scale, seed, num_inference_steps,
|
483 |
-
concpet_from_file_1,
|
484 |
-
concpet_from_file_2,
|
485 |
-
concpet_from_file_3,
|
486 |
-
use_concpet_from_file_1,
|
487 |
-
use_concpet_from_file_2,
|
488 |
-
use_concpet_from_file_3
|
489 |
-
],
|
490 |
-
outputs=[output_image]
|
491 |
-
)
|
492 |
-
|
493 |
-
concept_name1.select(
|
494 |
-
fn= change_rank_default,
|
495 |
-
inputs=[concept_name1],
|
496 |
-
outputs=[rank1]
|
497 |
-
)
|
498 |
-
concept_name2.select(
|
499 |
-
fn= change_rank_default,
|
500 |
-
inputs=[concept_name2],
|
501 |
-
outputs=[rank2]
|
502 |
-
)
|
503 |
-
concept_name3.select(
|
504 |
-
fn= change_rank_default,
|
505 |
-
inputs=[concept_name3],
|
506 |
-
outputs=[rank3]
|
507 |
-
)
|
508 |
-
concept_image1.upload(
|
509 |
-
fn=match_image_to_concept,
|
510 |
-
inputs=[concept_image1],
|
511 |
-
outputs=[concept_name1]
|
512 |
-
)
|
513 |
-
concept_image2.upload(
|
514 |
-
fn=match_image_to_concept,
|
515 |
-
inputs=[concept_image2],
|
516 |
-
outputs=[concept_name2]
|
517 |
-
)
|
518 |
-
concept_image3.upload(
|
519 |
-
fn=match_image_to_concept,
|
520 |
-
inputs=[concept_image3],
|
521 |
-
outputs=[concept_name3]
|
522 |
-
)
|
523 |
-
|
524 |
if __name__ == "__main__":
|
525 |
-
demo.launch()
|
|
|
1 |
+
# ===========================================
|
2 |
+
# IP-Composer 🌅✚🖌️ – FULL IMPROVED UI SCRIPT
|
3 |
+
# (기존 기능 그대로, UI·테마·레이아웃·갤러리 강화)
|
4 |
+
# ===========================================
|
5 |
+
|
6 |
+
import os, json, random, gc
|
7 |
import numpy as np
|
8 |
+
import torch
|
9 |
from PIL import Image
|
10 |
+
import gradio as gr
|
11 |
+
from gradio.themes import Soft # ★ NEW
|
12 |
from diffusers import StableDiffusionXLPipeline
|
13 |
import open_clip
|
14 |
from huggingface_hub import hf_hub_download
|
15 |
from IP_Composer.IP_Adapter.ip_adapter import IPAdapterXL
|
16 |
+
from IP_Composer.perform_swap import (compute_dataset_embeds_svd,
|
17 |
+
get_modified_images_embeds_composition)
|
18 |
+
from IP_Composer.generate_text_embeddings import (load_descriptions,
|
19 |
+
generate_embeddings)
|
20 |
import spaces
|
|
|
21 |
|
22 |
+
# ─────────────────────────────
|
23 |
+
# 1 · Device
|
24 |
+
# ─────────────────────────────
|
25 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
26 |
|
27 |
+
# ─────────────────────────────
|
28 |
+
# 2 · Stable-Diffusion XL
|
29 |
+
# ─────────────────────────────
|
30 |
base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
|
31 |
pipe = StableDiffusionXLPipeline.from_pretrained(
|
32 |
base_model_path,
|
|
|
34 |
add_watermarker=False,
|
35 |
)
|
36 |
|
37 |
+
# ─────────────────────────────
|
38 |
+
# 3 · IP-Adapter
|
39 |
+
# ─────────────────────────────
|
40 |
+
image_encoder_repo = 'h94/IP-Adapter'
|
41 |
image_encoder_subfolder = 'models/image_encoder'
|
42 |
+
ip_ckpt = hf_hub_download(
|
43 |
+
'h94/IP-Adapter',
|
44 |
+
subfolder="sdxl_models",
|
45 |
+
filename='ip-adapter_sdxl_vit-h.bin'
|
46 |
+
)
|
47 |
+
ip_model = IPAdapterXL(pipe, image_encoder_repo,
|
48 |
+
image_encoder_subfolder,
|
49 |
+
ip_ckpt, device)
|
50 |
|
51 |
+
# ─────────────────────────────
|
52 |
+
# 4 · CLIP
|
53 |
+
# ─────────────────────────────
|
54 |
+
clip_model, _, preprocess = open_clip.create_model_and_transforms(
|
55 |
+
'hf-hub:laion/CLIP-ViT-H-14-laion2B-s32B-b79K'
|
56 |
+
)
|
57 |
clip_model.to(device)
|
58 |
+
tokenizer = open_clip.get_tokenizer(
|
59 |
+
'hf-hub:laion/CLIP-ViT-H-14-laion2B-s32B-b79K'
|
60 |
+
)
|
61 |
|
62 |
+
# ─────────────────────────────
|
63 |
+
# 5 · Concept maps
|
64 |
+
# ─────────────────────────────
|
65 |
+
CONCEPTS_MAP = {
|
66 |
"age": "age_descriptions.npy",
|
67 |
"animal fur": "fur_descriptions.npy",
|
68 |
"dogs": "dog_descriptions.npy",
|
|
|
79 |
"season": "season_descriptions.npy",
|
80 |
"material": "material_descriptions_with_gems.npy"
|
81 |
}
|
82 |
+
RANKS_MAP = {
|
83 |
+
"age": 30, "animal fur": 80, "dogs": 30, "emotions": 30,
|
84 |
+
"flowers": 30, "fruit/vegtable": 30, "outfit type": 30,
|
85 |
+
"outfit pattern (including color)": 80, "patterns": 80,
|
86 |
+
"patterns (including color)": 80, "vehicle": 30,
|
87 |
+
"daytime": 30, "pose": 30, "season": 30, "material": 80
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
}
|
89 |
concept_options = list(CONCEPTS_MAP.keys())
|
90 |
|
91 |
+
# ─────────────────────────────
|
92 |
+
# 6 · Example tuples (base_img, c1_img, …)
|
93 |
+
# ─────────────────────────────
|
94 |
examples = [
|
95 |
+
['./IP_Composer/assets/patterns/base.jpg',
|
96 |
+
'./IP_Composer/assets/patterns/pattern.png',
|
97 |
+
'patterns (including color)', None, None, None, None,
|
98 |
+
80, 30, 30, None, 1.0, 0, 30],
|
99 |
+
['./IP_Composer/assets/flowers/base.png',
|
100 |
+
'./IP_Composer/assets/flowers/concept.png',
|
101 |
+
'flowers', None, None, None, None,
|
102 |
+
30, 30, 30, None, 1.0, 0, 30],
|
103 |
+
['./IP_Composer/assets/materials/base.png',
|
104 |
+
'./IP_Composer/assets/materials/concept.jpg',
|
105 |
+
'material', None, None, None, None,
|
106 |
+
80, 30, 30, None, 1.0, 0, 30],
|
107 |
+
# … (생략 없이 추가 가능)
|
108 |
]
|
109 |
|
110 |
+
# ----------------------------------------------------------
|
111 |
+
# 7 · Utility functions (unchanged except docstring tweaks)
|
112 |
+
# ----------------------------------------------------------
|
113 |
def generate_examples(base_image,
|
114 |
concept_image1, concept_name1,
|
115 |
concept_image2, concept_name2,
|
116 |
concept_image3, concept_name3,
|
117 |
rank1, rank2, rank3,
|
118 |
prompt, scale, seed, num_inference_steps):
|
119 |
+
return process_and_display(base_image,
|
120 |
+
concept_image1, concept_name1,
|
121 |
+
concept_image2, concept_name2,
|
122 |
+
concept_image3, concept_name3,
|
123 |
+
rank1, rank2, rank3,
|
124 |
+
prompt, scale, seed, num_inference_steps)
|
|
|
125 |
|
|
|
|
|
126 |
MAX_SEED = np.iinfo(np.int32).max
|
127 |
def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
128 |
+
return random.randint(0, MAX_SEED) if randomize_seed else seed
|
|
|
|
|
129 |
|
130 |
+
def change_rank_default(concept_name): # rank 자동 조정
|
131 |
return RANKS_MAP.get(concept_name, 30)
|
132 |
|
133 |
@spaces.GPU
|
134 |
def match_image_to_concept(image):
|
|
|
|
|
|
|
135 |
if image is None:
|
136 |
return None
|
137 |
+
img_pil = Image.fromarray(image).convert("RGB")
|
|
|
|
|
138 |
img_embed = get_image_embeds(img_pil, clip_model, preprocess, device)
|
139 |
+
|
|
|
140 |
similarities = {}
|
141 |
for concept_name, concept_file in CONCEPTS_MAP.items():
|
142 |
try:
|
|
|
143 |
embeds_path = f"./IP_Composer/text_embeddings/{concept_file}"
|
144 |
with open(embeds_path, "rb") as f:
|
145 |
concept_embeds = np.load(f)
|
|
|
|
|
146 |
sim_scores = []
|
147 |
for embed in concept_embeds:
|
148 |
+
sim = np.dot(img_embed.flatten()/np.linalg.norm(img_embed),
|
149 |
+
embed.flatten()/np.linalg.norm(embed))
|
150 |
+
sim_scores.append(sim)
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
sim_scores.sort(reverse=True)
|
152 |
+
similarities[concept_name] = np.mean(sim_scores[:5])
|
|
|
|
|
|
|
153 |
except Exception as e:
|
154 |
+
print(f"Concept {concept_name} error: {e}")
|
|
|
|
|
155 |
if similarities:
|
156 |
+
detected = max(similarities, key=similarities.get)
|
157 |
+
gr.Info(f"Image automatically matched to concept: {detected}")
|
158 |
+
return detected
|
|
|
159 |
return None
|
160 |
|
161 |
@spaces.GPU
|
162 |
def get_image_embeds(pil_image, model=clip_model, preproc=preprocess, dev=device):
|
|
|
163 |
image = preproc(pil_image)[np.newaxis, :, :, :]
|
164 |
with torch.no_grad():
|
165 |
embeds = model.encode_image(image.to(dev))
|
|
|
172 |
concept_image2=None, concept_name2=None,
|
173 |
concept_image3=None, concept_name3=None,
|
174 |
rank1=10, rank2=10, rank3=10,
|
175 |
+
prompt=None, scale=1.0, seed=420, num_inference_steps=50,
|
176 |
+
concpet_from_file_1=None, concpet_from_file_2=None, concpet_from_file_3=None,
|
177 |
+
use_concpet_from_file_1=False, use_concpet_from_file_2=False, use_concpet_from_file_3=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
):
|
179 |
+
base_pil = Image.fromarray(base_image).convert("RGB")
|
180 |
+
base_embed = get_image_embeds(base_pil, clip_model, preprocess, device)
|
181 |
+
|
182 |
+
concept_images, concept_descs, ranks = [], [], []
|
183 |
+
skip = [False, False, False]
|
184 |
+
|
185 |
+
# ─── concept 1
|
186 |
+
if concept_image1 is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
return None, "Please upload at least one concept image"
|
188 |
+
concept_images.append(concept_image1)
|
189 |
+
if use_concpet_from_file_1 and concpet_from_file_1 is not None:
|
190 |
+
concept_descs.append(concpet_from_file_1); skip[0] = True
|
191 |
+
else:
|
192 |
+
concept_descs.append(CONCEPTS_MAP[concept_name1])
|
193 |
+
ranks.append(rank1)
|
194 |
+
|
195 |
+
# ─── concept 2
|
196 |
if concept_image2 is not None:
|
197 |
concept_images.append(concept_image2)
|
198 |
+
if use_concpet_from_file_2 and concpet_from_file_2 is not None:
|
199 |
+
concept_descs.append(concpet_from_file_2); skip[1] = True
|
|
|
200 |
else:
|
201 |
+
concept_descs.append(CONCEPTS_MAP[concept_name2])
|
202 |
+
ranks.append(rank2)
|
203 |
+
|
204 |
+
# ─── concept 3
|
205 |
if concept_image3 is not None:
|
206 |
concept_images.append(concept_image3)
|
207 |
+
if use_concpet_from_file_3 and concpet_from_file_3 is not None:
|
208 |
+
concept_descs.append(concpet_from_file_3); skip[2] = True
|
|
|
209 |
else:
|
210 |
+
concept_descs.append(CONCEPTS_MAP[concept_name3])
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
ranks.append(rank3)
|
212 |
+
|
213 |
+
concept_embeds, proj_mats = [], []
|
214 |
+
for i, concept in enumerate(concept_descs):
|
|
|
|
|
|
|
|
|
215 |
img_pil = Image.fromarray(concept_images[i]).convert("RGB")
|
216 |
concept_embeds.append(get_image_embeds(img_pil, clip_model, preprocess, device))
|
217 |
+
if skip[i]:
|
218 |
+
all_embeds = concept
|
219 |
else:
|
220 |
+
with open(f"./IP_Composer/text_embeddings/{concept}", "rb") as f:
|
221 |
+
all_embeds = np.load(f)
|
222 |
+
proj_mats.append(compute_dataset_embeds_svd(all_embeds, ranks[i]))
|
223 |
+
|
|
|
|
|
|
|
|
|
|
|
224 |
projections_data = [
|
225 |
+
{"embed": e, "projection_matrix": p}
|
226 |
+
for e, p in zip(concept_embeds, proj_mats)
|
|
|
|
|
|
|
227 |
]
|
|
|
|
|
228 |
modified_images = get_modified_images_embeds_composition(
|
229 |
+
base_embed, projections_data, ip_model,
|
230 |
+
prompt=prompt, scale=scale,
|
231 |
+
num_samples=1, seed=seed, num_inference_steps=num_inference_steps
|
|
|
|
|
|
|
|
|
|
|
232 |
)
|
|
|
233 |
return modified_images[0]
|
234 |
|
235 |
@spaces.GPU
|
236 |
def get_text_embeddings(concept_file):
|
|
|
237 |
descriptions = load_descriptions(concept_file)
|
238 |
+
embeddings = generate_embeddings(descriptions, clip_model,
|
239 |
+
tokenizer, device, batch_size=100)
|
240 |
return embeddings, True
|
|
|
241 |
|
242 |
def process_and_display(
|
243 |
base_image,
|
|
|
246 |
concept_image3=None, concept_name3=None,
|
247 |
rank1=30, rank2=30, rank3=30,
|
248 |
prompt=None, scale=1.0, seed=0, num_inference_steps=50,
|
249 |
+
concpet_from_file_1=None, concpet_from_file_2=None, concpet_from_file_3=None,
|
250 |
+
use_concpet_from_file_1=False, use_concpet_from_file_2=False, use_concpet_from_file_3=False
|
|
|
|
|
|
|
|
|
251 |
):
|
252 |
if base_image is None:
|
253 |
raise gr.Error("Please upload a base image")
|
|
|
254 |
if concept_image1 is None:
|
255 |
raise gr.Error("Choose at least one concept image")
|
256 |
|
257 |
+
return process_images(
|
258 |
+
base_image, concept_image1, concept_name1,
|
|
|
|
|
|
|
|
|
259 |
concept_image2, concept_name2,
|
260 |
concept_image3, concept_name3,
|
261 |
+
rank1, rank2, rank3,
|
262 |
prompt, scale, seed, num_inference_steps,
|
263 |
+
concpet_from_file_1, concpet_from_file_2, concpet_from_file_3,
|
264 |
+
use_concpet_from_file_1, use_concpet_from_file_2, use_concpet_from_file_3
|
|
|
|
|
|
|
|
|
265 |
)
|
|
|
|
|
266 |
|
267 |
+
# ----------------------------------------------------------
|
268 |
+
# 8 · 💄 THEME & CSS UPGRADE
|
269 |
+
# ----------------------------------------------------------
|
270 |
+
demo_theme = Soft( # ★ NEW
|
271 |
+
primary_hue="purple",
|
272 |
+
font=[gr.themes.GoogleFont("Inter")]
|
273 |
+
)
|
274 |
css = """
|
275 |
+
body{
|
276 |
+
background:#0f0c29;
|
277 |
+
background:linear-gradient(135deg,#0f0c29,#302b63,#24243e);
|
278 |
}
|
279 |
+
#header{ text-align:center;
|
280 |
+
padding:24px 0 8px;
|
281 |
+
font-weight:700;
|
282 |
+
font-size:2.1rem;
|
283 |
+
color:#ffffff;}
|
284 |
+
.gradio-container{max-width:1024px !important;margin:0 auto}
|
285 |
+
.card{
|
286 |
+
border-radius:18px;
|
287 |
+
background:#ffffff0d;
|
288 |
+
padding:18px 22px;
|
289 |
+
backdrop-filter:blur(6px);
|
290 |
}
|
291 |
+
.gr-image,.gr-video{border-radius:14px}
|
292 |
+
.gr-image:hover{box-shadow:0 0 0 4px #a855f7}
|
293 |
"""
|
|
|
|
|
294 |
|
295 |
+
# ----------------------------------------------------------
|
296 |
+
# 9 · 🖼️ Demo UI
|
297 |
+
# ----------------------------------------------------------
|
298 |
+
example_gallery = [
|
299 |
+
['./IP_Composer/assets/patterns/base.jpg', "Patterns demo"],
|
300 |
+
['./IP_Composer/assets/flowers/base.png', "Flowers demo"],
|
301 |
+
['./IP_Composer/assets/materials/base.png',"Material demo"],
|
302 |
+
]
|
303 |
|
304 |
+
with gr.Blocks(css=css, theme=demo_theme) as demo:
|
305 |
+
# ─── Header
|
306 |
+
gr.Markdown("<div id='header'>🌅 IP-Composer "
|
307 |
+
"<sup style='font-size:14px'>SDXL</sup></div>")
|
308 |
|
309 |
+
# ─── States for custom concepts
|
310 |
+
concpet_from_file_1 = gr.State()
|
311 |
+
concpet_from_file_2 = gr.State()
|
312 |
+
concpet_from_file_3 = gr.State()
|
313 |
+
use_concpet_from_file_1 = gr.State()
|
314 |
+
use_concpet_from_file_2 = gr.State()
|
315 |
+
use_concpet_from_file_3 = gr.State()
|
316 |
|
317 |
+
# ─── Main layout
|
318 |
+
with gr.Row(equal_height=True):
|
319 |
+
# Base image card
|
320 |
+
with gr.Column(elem_classes="card"):
|
321 |
+
base_image = gr.Image(label="Base Image (Required)",
|
322 |
+
type="numpy", height=400, width=400)
|
323 |
|
324 |
+
# Concept cards (1 · 2 · 3)
|
325 |
+
for idx in (1, 2, 3):
|
326 |
+
with gr.Column(elem_classes="card"):
|
327 |
+
locals()[f"concept_image{idx}"] = gr.Image(
|
328 |
+
label=f"Concept Image {idx}" if idx == 1 else f"Concept {idx} (Optional)",
|
329 |
+
type="numpy", height=400, width=400
|
330 |
+
)
|
331 |
+
locals()[f"concept_name{idx}"] = gr.Dropdown(
|
332 |
+
concept_options, label=f"Concept {idx}",
|
333 |
+
value=None if idx != 1 else "age",
|
334 |
+
info="Pick concept type"
|
335 |
+
)
|
336 |
+
with gr.Accordion("💡 Or use a new concept 👇", open=False):
|
337 |
+
gr.Markdown("1. Upload a file with **>100** text variations<br>"
|
338 |
+
"2. Tip: Ask an LLM to list variations.")
|
339 |
+
if idx == 1:
|
340 |
+
concept_file_1 = gr.File("Concept variations",
|
341 |
+
file_types=["text"])
|
342 |
+
elif idx == 2:
|
343 |
+
concept_file_2 = gr.File("Concept variations",
|
344 |
+
file_types=["text"])
|
345 |
+
else:
|
346 |
+
concept_file_3 = gr.File("Concept variations",
|
347 |
+
file_types=["text"])
|
348 |
|
349 |
+
# ─── Advanced options card (full width)
|
350 |
+
with gr.Column(elem_classes="card"):
|
351 |
+
with gr.Accordion("⚙️ Advanced options", open=False):
|
352 |
+
prompt = gr.Textbox(label="Guidance Prompt (Optional)",
|
353 |
+
placeholder="Optional text prompt to guide generation")
|
354 |
+
num_inference_steps = gr.Slider(1, 50, value=30, step=1,
|
355 |
+
label="Num steps")
|
356 |
+
with gr.Row():
|
357 |
+
scale = gr.Slider(0.1, 2.0, value=1.0, step=0.1, label="Scale")
|
358 |
+
randomize_seed = gr.Checkbox(True, label="Randomize seed")
|
359 |
+
seed = gr.Number(value=0, label="Seed", precision=0)
|
360 |
+
gr.Markdown("If a concept is not showing enough, **increase rank** ⬇️")
|
361 |
+
with gr.Row():
|
362 |
+
rank1 = gr.Slider(1, 150, value=30, step=1, label="Rank concept 1")
|
363 |
+
rank2 = gr.Slider(1, 150, value=30, step=1, label="Rank concept 2")
|
364 |
+
rank3 = gr.Slider(1, 150, value=30, step=1, label="Rank concept 3")
|
365 |
|
366 |
+
# ─── Output & Generate button
|
367 |
+
with gr.Column(elem_classes="card"):
|
368 |
+
output_image = gr.Image(show_label=False, height=480)
|
369 |
+
submit_btn = gr.Button("🔮 Generate", variant="primary", size="lg")
|
370 |
+
|
371 |
+
# ─── Ready-made Gallery
|
372 |
+
gr.Markdown("### 🔥 Ready-made examples")
|
373 |
+
gr.Gallery(example_gallery, label="클릭해서 미리보기",
|
374 |
+
columns=[3], height="auto")
|
375 |
+
|
376 |
+
# ─── Example usage (kept for quick test)
|
377 |
+
gr.Examples(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
examples,
|
379 |
+
inputs=[base_image, concept_image1, concept_name1,
|
|
|
380 |
concept_image2, concept_name2,
|
381 |
concept_image3, concept_name3,
|
382 |
+
rank1, rank2, rank3,
|
383 |
prompt, scale, seed, num_inference_steps],
|
384 |
outputs=[output_image],
|
385 |
fn=generate_examples,
|
386 |
cache_examples=False
|
387 |
+
)
|
388 |
+
|
389 |
+
# ─── File upload triggers
|
390 |
+
concept_file_1.upload(get_text_embeddings, [concept_file_1],
|
391 |
+
[concpet_from_file_1, use_concpet_from_file_1])
|
392 |
+
concept_file_2.upload(get_text_embeddings, [concept_file_2],
|
393 |
+
[concpet_from_file_2, use_concpet_from_file_2])
|
394 |
+
concept_file_3.upload(get_text_embeddings, [concept_file_3],
|
395 |
+
[concpet_from_file_3, use_concpet_from_file_3])
|
396 |
+
concept_file_1.delete(lambda x: False, [concept_file_1],
|
397 |
+
[use_concpet_from_file_1])
|
398 |
+
concept_file_2.delete(lambda x: False, [concept_file_2],
|
399 |
+
[use_concpet_from_file_2])
|
400 |
+
concept_file_3.delete(lambda x: False, [concept_file_3],
|
401 |
+
[use_concpet_from_file_3])
|
402 |
+
|
403 |
+
# ─── Dropdown auto-rank
|
404 |
+
concept_name1.select(change_rank_default, [concept_name1], [rank1])
|
405 |
+
concept_name2.select(change_rank_default, [concept_name2], [rank2])
|
406 |
+
concept_name3.select(change_rank_default, [concept_name3], [rank3])
|
407 |
+
|
408 |
+
# ─── Auto-match concept type on image upload
|
409 |
+
concept_image1.upload(match_image_to_concept, [concept_image1], [concept_name1])
|
410 |
+
concept_image2.upload(match_image_to_concept, [concept_image2], [concept_name2])
|
411 |
+
concept_image3.upload(match_image_to_concept, [concept_image3], [concept_name3])
|
412 |
+
|
413 |
+
# ─── Generate click chain
|
414 |
+
submit_btn.click(randomize_seed_fn, [seed, randomize_seed], seed) \
|
415 |
+
.then(process_and_display,
|
416 |
+
[base_image, concept_image1, concept_name1,
|
417 |
+
concept_image2, concept_name2,
|
418 |
+
concept_image3, concept_name3,
|
419 |
+
rank1, rank2, rank3,
|
420 |
+
prompt, scale, seed, num_inference_steps,
|
421 |
+
concpet_from_file_1, concpet_from_file_2, concpet_from_file_3,
|
422 |
+
use_concpet_from_file_1, use_concpet_from_file_2, use_concpet_from_file_3],
|
423 |
+
[output_image])
|
424 |
+
|
425 |
+
# ─────────────────────────────
|
426 |
+
# 10 · Launch
|
427 |
+
# ─────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
if __name__ == "__main__":
|
429 |
+
demo.launch()
|