Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ import spaces
|
|
7 |
|
8 |
import PIL
|
9 |
from PIL import Image
|
10 |
-
from typing import Tuple
|
11 |
|
12 |
import diffusers
|
13 |
from diffusers.utils import load_image
|
@@ -21,8 +21,6 @@ from insightface.app import FaceAnalysis
|
|
21 |
from style_template import styles
|
22 |
from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline, draw_kps
|
23 |
|
24 |
-
# from controlnet_aux import OpenposeDetector
|
25 |
-
|
26 |
import gradio as gr
|
27 |
|
28 |
from depth_anything.dpt import DepthAnything
|
@@ -58,8 +56,6 @@ app = FaceAnalysis(
|
|
58 |
)
|
59 |
app.prepare(ctx_id=0, det_size=(640, 640))
|
60 |
|
61 |
-
# openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
|
62 |
-
|
63 |
depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
|
64 |
|
65 |
transform = Compose([
|
@@ -85,14 +81,10 @@ controlnet_identitynet = ControlNetModel.from_pretrained(
|
|
85 |
controlnet_path, torch_dtype=dtype
|
86 |
)
|
87 |
|
88 |
-
# controlnet-
|
89 |
-
# controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
|
90 |
controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
|
91 |
controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
|
92 |
|
93 |
-
# controlnet_pose = ControlNetModel.from_pretrained(
|
94 |
-
# controlnet_pose_model, torch_dtype=dtype
|
95 |
-
# ).to(device)
|
96 |
controlnet_canny = ControlNetModel.from_pretrained(
|
97 |
controlnet_canny_model, torch_dtype=dtype
|
98 |
).to(device)
|
@@ -127,12 +119,10 @@ def get_canny_image(image, t1=100, t2=200):
|
|
127 |
return Image.fromarray(edges, "L")
|
128 |
|
129 |
controlnet_map = {
|
130 |
-
#"pose": controlnet_pose,
|
131 |
"canny": controlnet_canny,
|
132 |
"depth": controlnet_depth,
|
133 |
}
|
134 |
controlnet_map_fn = {
|
135 |
-
#"pose": openpose,
|
136 |
"canny": get_canny_image,
|
137 |
"depth": get_depth_map,
|
138 |
}
|
@@ -180,67 +170,6 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
|
180 |
def remove_tips():
|
181 |
return gr.update(visible=False)
|
182 |
|
183 |
-
def get_example():
|
184 |
-
case = [
|
185 |
-
[
|
186 |
-
"./examples/yann-lecun_resize.jpg",
|
187 |
-
None,
|
188 |
-
"a man",
|
189 |
-
"Spring Festival",
|
190 |
-
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
191 |
-
],
|
192 |
-
[
|
193 |
-
"./examples/musk_resize.jpeg",
|
194 |
-
"./examples/poses/pose2.jpg",
|
195 |
-
"a man flying in the sky in Mars",
|
196 |
-
"Mars",
|
197 |
-
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
198 |
-
],
|
199 |
-
[
|
200 |
-
"./examples/sam_resize.png",
|
201 |
-
"./examples/poses/pose4.jpg",
|
202 |
-
"a man doing a silly pose wearing a suite",
|
203 |
-
"Jungle",
|
204 |
-
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, gree",
|
205 |
-
],
|
206 |
-
[
|
207 |
-
"./examples/schmidhuber_resize.png",
|
208 |
-
"./examples/poses/pose3.jpg",
|
209 |
-
"a man sit on a chair",
|
210 |
-
"Neon",
|
211 |
-
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
212 |
-
],
|
213 |
-
[
|
214 |
-
"./examples/kaifu_resize.png",
|
215 |
-
"./examples/poses/pose.jpg",
|
216 |
-
"a man",
|
217 |
-
"Vibrant Color",
|
218 |
-
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
219 |
-
],
|
220 |
-
]
|
221 |
-
return case
|
222 |
-
|
223 |
-
def run_for_examples(face_file, pose_file, prompt, style, negative_prompt):
|
224 |
-
return generate_image(
|
225 |
-
face_file,
|
226 |
-
pose_file,
|
227 |
-
prompt,
|
228 |
-
negative_prompt,
|
229 |
-
style,
|
230 |
-
20, # num_steps
|
231 |
-
0.8, # identitynet_strength_ratio
|
232 |
-
0.8, # adapter_strength_ratio
|
233 |
-
#0.4, # pose_strength
|
234 |
-
0.3, # canny_strength
|
235 |
-
0.5, # depth_strength
|
236 |
-
["depth", "canny"], # controlnet_selection
|
237 |
-
5.0, # guidance_scale
|
238 |
-
42, # seed
|
239 |
-
"EulerDiscreteScheduler", # scheduler
|
240 |
-
False, # enable_LCM
|
241 |
-
True, # enable_Face_Region
|
242 |
-
)
|
243 |
-
|
244 |
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
245 |
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
246 |
|
@@ -284,9 +213,12 @@ def apply_style(
|
|
284 |
p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
|
285 |
return p.replace("{prompt}", positive), n + " " + negative
|
286 |
|
|
|
|
|
|
|
287 |
@spaces.GPU
|
288 |
def generate_image(
|
289 |
-
|
290 |
pose_image_path,
|
291 |
prompt,
|
292 |
negative_prompt,
|
@@ -294,7 +226,6 @@ def generate_image(
|
|
294 |
num_steps,
|
295 |
identitynet_strength_ratio,
|
296 |
adapter_strength_ratio,
|
297 |
-
#pose_strength,
|
298 |
canny_strength,
|
299 |
depth_strength,
|
300 |
controlnet_selection,
|
@@ -321,9 +252,9 @@ def generate_image(
|
|
321 |
scheduler = getattr(diffusers, scheduler_class_name)
|
322 |
pipe.scheduler = scheduler.from_config(pipe.scheduler.config, **add_kwargs)
|
323 |
|
324 |
-
if
|
325 |
raise gr.Error(
|
326 |
-
f"Cannot find any input face
|
327 |
)
|
328 |
|
329 |
if prompt is None:
|
@@ -332,28 +263,67 @@ def generate_image(
|
|
332 |
# apply the style template
|
333 |
prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
334 |
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
face_info = app.get(face_image_cv2)
|
342 |
|
343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
344 |
raise gr.Error(
|
345 |
-
f"Unable to detect a face in the
|
346 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
key=lambda x: (x["bbox"][2] - x["bbox"][0]) * x["bbox"][3] - x["bbox"][1],
|
351 |
-
)[
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
img_controlnet = face_image
|
357 |
if pose_image_path is not None:
|
358 |
pose_image = load_image(pose_image_path)
|
359 |
pose_image = resize_img(pose_image, max_side=1024)
|
@@ -383,7 +353,6 @@ def generate_image(
|
|
383 |
|
384 |
if len(controlnet_selection) > 0:
|
385 |
controlnet_scales = {
|
386 |
-
#"pose": pose_strength,
|
387 |
"canny": canny_strength,
|
388 |
"depth": depth_strength,
|
389 |
}
|
@@ -425,9 +394,42 @@ def generate_image(
|
|
425 |
|
426 |
return images[0], gr.update(visible=True)
|
427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
# Description
|
429 |
title = r"""
|
430 |
-
<h1 align="center">InstantID: Zero-shot Identity-Preserving Generation
|
431 |
"""
|
432 |
|
433 |
article = r"""
|
@@ -449,11 +451,12 @@ If you have any questions, please feel free to open an issue or directly reach u
|
|
449 |
"""
|
450 |
|
451 |
tips = r"""
|
452 |
-
### Usage tips of InstantID
|
453 |
-
1.
|
454 |
-
2. If you
|
455 |
-
3. If you
|
456 |
-
4. If you find that
|
|
|
457 |
"""
|
458 |
|
459 |
css = """
|
@@ -466,10 +469,19 @@ with gr.Blocks(css=css) as demo:
|
|
466 |
with gr.Row():
|
467 |
with gr.Column():
|
468 |
with gr.Row(equal_height=True):
|
469 |
-
#
|
470 |
-
|
471 |
-
label="Upload
|
|
|
472 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
473 |
|
474 |
# prompt
|
475 |
prompt = gr.Textbox(
|
@@ -514,28 +526,21 @@ with gr.Blocks(css=css) as demo:
|
|
514 |
)
|
515 |
controlnet_selection = gr.CheckboxGroup(
|
516 |
["canny", "depth"], label="Controlnet", value=[],
|
517 |
-
info="Use
|
518 |
)
|
519 |
-
# pose_strength = gr.Slider(
|
520 |
-
# label="Pose strength",
|
521 |
-
# minimum=0,
|
522 |
-
# maximum=1.5,
|
523 |
-
# step=0.05,
|
524 |
-
# value=0.40,
|
525 |
-
# )
|
526 |
canny_strength = gr.Slider(
|
527 |
label="Canny strength",
|
528 |
minimum=0,
|
529 |
maximum=1.5,
|
530 |
step=0.05,
|
531 |
-
value=0,
|
532 |
)
|
533 |
depth_strength = gr.Slider(
|
534 |
label="Depth strength",
|
535 |
minimum=0,
|
536 |
maximum=1.5,
|
537 |
step=0.05,
|
538 |
-
value=0,
|
539 |
)
|
540 |
with gr.Accordion(open=False, label="Advanced Options"):
|
541 |
negative_prompt = gr.Textbox(
|
@@ -586,6 +591,9 @@ with gr.Blocks(css=css) as demo:
|
|
586 |
label="InstantID Usage Tips", value=tips, visible=False
|
587 |
)
|
588 |
|
|
|
|
|
|
|
589 |
submit.click(
|
590 |
fn=remove_tips,
|
591 |
outputs=usage_tips,
|
@@ -598,7 +606,7 @@ with gr.Blocks(css=css) as demo:
|
|
598 |
).then(
|
599 |
fn=generate_image,
|
600 |
inputs=[
|
601 |
-
face_file
|
602 |
pose_file,
|
603 |
prompt,
|
604 |
negative_prompt,
|
@@ -606,7 +614,6 @@ with gr.Blocks(css=css) as demo:
|
|
606 |
num_steps,
|
607 |
identitynet_strength_ratio,
|
608 |
adapter_strength_ratio,
|
609 |
-
#pose_strength,
|
610 |
canny_strength,
|
611 |
depth_strength,
|
612 |
controlnet_selection,
|
@@ -628,7 +635,7 @@ with gr.Blocks(css=css) as demo:
|
|
628 |
|
629 |
gr.Examples(
|
630 |
examples=get_example(),
|
631 |
-
inputs=[
|
632 |
fn=run_for_examples,
|
633 |
outputs=[gallery, usage_tips],
|
634 |
cache_examples=True,
|
|
|
7 |
|
8 |
import PIL
|
9 |
from PIL import Image
|
10 |
+
from typing import Tuple, List
|
11 |
|
12 |
import diffusers
|
13 |
from diffusers.utils import load_image
|
|
|
21 |
from style_template import styles
|
22 |
from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline, draw_kps
|
23 |
|
|
|
|
|
24 |
import gradio as gr
|
25 |
|
26 |
from depth_anything.dpt import DepthAnything
|
|
|
56 |
)
|
57 |
app.prepare(ctx_id=0, det_size=(640, 640))
|
58 |
|
|
|
|
|
59 |
depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
|
60 |
|
61 |
transform = Compose([
|
|
|
81 |
controlnet_path, torch_dtype=dtype
|
82 |
)
|
83 |
|
84 |
+
# controlnet-canny/depth
|
|
|
85 |
controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
|
86 |
controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
|
87 |
|
|
|
|
|
|
|
88 |
controlnet_canny = ControlNetModel.from_pretrained(
|
89 |
controlnet_canny_model, torch_dtype=dtype
|
90 |
).to(device)
|
|
|
119 |
return Image.fromarray(edges, "L")
|
120 |
|
121 |
controlnet_map = {
|
|
|
122 |
"canny": controlnet_canny,
|
123 |
"depth": controlnet_depth,
|
124 |
}
|
125 |
controlnet_map_fn = {
|
|
|
126 |
"canny": get_canny_image,
|
127 |
"depth": get_depth_map,
|
128 |
}
|
|
|
170 |
def remove_tips():
|
171 |
return gr.update(visible=False)
|
172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
174 |
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
175 |
|
|
|
213 |
p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
|
214 |
return p.replace("{prompt}", positive), n + " " + negative
|
215 |
|
216 |
+
def update_face_gallery(files):
|
217 |
+
return gr.update(value=files, visible=True)
|
218 |
+
|
219 |
@spaces.GPU
|
220 |
def generate_image(
|
221 |
+
face_images_path, # Now accepts a list of image paths
|
222 |
pose_image_path,
|
223 |
prompt,
|
224 |
negative_prompt,
|
|
|
226 |
num_steps,
|
227 |
identitynet_strength_ratio,
|
228 |
adapter_strength_ratio,
|
|
|
229 |
canny_strength,
|
230 |
depth_strength,
|
231 |
controlnet_selection,
|
|
|
252 |
scheduler = getattr(diffusers, scheduler_class_name)
|
253 |
pipe.scheduler = scheduler.from_config(pipe.scheduler.config, **add_kwargs)
|
254 |
|
255 |
+
if face_images_path is None or len(face_images_path) == 0:
|
256 |
raise gr.Error(
|
257 |
+
f"Cannot find any input face images! Please upload at least one face image"
|
258 |
)
|
259 |
|
260 |
if prompt is None:
|
|
|
263 |
# apply the style template
|
264 |
prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
265 |
|
266 |
+
# Use the first face image for face keypoints and size reference
|
267 |
+
reference_face_path = face_images_path[0] if isinstance(face_images_path, list) else face_images_path
|
268 |
+
reference_face_image = load_image(reference_face_path)
|
269 |
+
reference_face_image = resize_img(reference_face_image, max_side=1024)
|
270 |
+
reference_face_cv2 = convert_from_image_to_cv2(reference_face_image)
|
271 |
+
height, width, _ = reference_face_cv2.shape
|
|
|
272 |
|
273 |
+
# Initialize a list to collect face embeddings
|
274 |
+
face_embeddings = []
|
275 |
+
|
276 |
+
# Process each face image if multiple images are provided
|
277 |
+
face_image_paths = face_images_path if isinstance(face_images_path, list) else [face_images_path]
|
278 |
+
|
279 |
+
for face_path in face_image_paths:
|
280 |
+
face_img = load_image(face_path)
|
281 |
+
face_img = resize_img(face_img, max_side=1024)
|
282 |
+
face_img_cv2 = convert_from_image_to_cv2(face_img)
|
283 |
+
|
284 |
+
# Extract face features
|
285 |
+
face_info = app.get(face_img_cv2)
|
286 |
+
|
287 |
+
if len(face_info) == 0:
|
288 |
+
print(f"Warning: Unable to detect a face in {face_path}. Skipping this image.")
|
289 |
+
continue
|
290 |
+
|
291 |
+
# Use the largest face in each image
|
292 |
+
face_info = sorted(
|
293 |
+
face_info,
|
294 |
+
key=lambda x: (x["bbox"][2] - x["bbox"][0]) * x["bbox"][3] - x["bbox"][1],
|
295 |
+
)[-1]
|
296 |
+
|
297 |
+
# Collect the embedding
|
298 |
+
face_embeddings.append(torch.tensor(face_info["embedding"]).unsqueeze(0))
|
299 |
+
|
300 |
+
if len(face_embeddings) == 0:
|
301 |
raise gr.Error(
|
302 |
+
f"Unable to detect a face in any of the uploaded images. Please upload different photos with clear faces."
|
303 |
)
|
304 |
+
|
305 |
+
# Average the face embeddings
|
306 |
+
if len(face_embeddings) == 1:
|
307 |
+
face_emb = face_embeddings[0].squeeze().numpy() # Use as is if only one image
|
308 |
+
else:
|
309 |
+
# Stack and compute mean along the batch dimension
|
310 |
+
face_emb = torch.mean(torch.cat(face_embeddings, dim=0), dim=0).numpy()
|
311 |
+
print(f"Averaged {len(face_embeddings)} face embeddings")
|
312 |
|
313 |
+
# Extract keypoints from the reference face for ControlNet
|
314 |
+
reference_face_info = app.get(reference_face_cv2)
|
315 |
+
if len(reference_face_info) == 0:
|
316 |
+
raise gr.Error(
|
317 |
+
f"Unable to detect a face in the reference image for keypoints. Please upload a different photo with a clear face."
|
318 |
+
)
|
319 |
+
reference_face_info = sorted(
|
320 |
+
reference_face_info,
|
321 |
key=lambda x: (x["bbox"][2] - x["bbox"][0]) * x["bbox"][3] - x["bbox"][1],
|
322 |
+
)[-1] # Use the largest face
|
323 |
+
|
324 |
+
face_kps = draw_kps(convert_from_cv2_to_image(reference_face_cv2), reference_face_info["kps"])
|
325 |
+
img_controlnet = reference_face_image
|
326 |
+
|
|
|
327 |
if pose_image_path is not None:
|
328 |
pose_image = load_image(pose_image_path)
|
329 |
pose_image = resize_img(pose_image, max_side=1024)
|
|
|
353 |
|
354 |
if len(controlnet_selection) > 0:
|
355 |
controlnet_scales = {
|
|
|
356 |
"canny": canny_strength,
|
357 |
"depth": depth_strength,
|
358 |
}
|
|
|
394 |
|
395 |
return images[0], gr.update(visible=True)
|
396 |
|
397 |
+
def get_example():
|
398 |
+
case = [
|
399 |
+
[
|
400 |
+
"./examples/yann-lecun_resize.jpg",
|
401 |
+
None,
|
402 |
+
"a man",
|
403 |
+
"Spring Festival",
|
404 |
+
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
405 |
+
],
|
406 |
+
# Add more examples as needed
|
407 |
+
]
|
408 |
+
return case
|
409 |
+
|
410 |
+
def run_for_examples(face_file, pose_file, prompt, style, negative_prompt):
|
411 |
+
return generate_image(
|
412 |
+
face_file,
|
413 |
+
pose_file,
|
414 |
+
prompt,
|
415 |
+
negative_prompt,
|
416 |
+
style,
|
417 |
+
20, # num_steps
|
418 |
+
0.8, # identitynet_strength_ratio
|
419 |
+
0.8, # adapter_strength_ratio
|
420 |
+
0.3, # canny_strength
|
421 |
+
0.5, # depth_strength
|
422 |
+
["depth", "canny"], # controlnet_selection
|
423 |
+
5.0, # guidance_scale
|
424 |
+
42, # seed
|
425 |
+
"EulerDiscreteScheduler", # scheduler
|
426 |
+
False, # enable_LCM
|
427 |
+
True, # enable_Face_Region
|
428 |
+
)
|
429 |
+
|
430 |
# Description
|
431 |
title = r"""
|
432 |
+
<h1 align="center">InstantID: Zero-shot Identity-Preserving Generation with Multi-Face Averaging</h1>
|
433 |
"""
|
434 |
|
435 |
article = r"""
|
|
|
451 |
"""
|
452 |
|
453 |
tips = r"""
|
454 |
+
### Usage tips of InstantID with Multi-Face Averaging
|
455 |
+
1. Upload multiple photos of the same person for better identity preservation through face embedding averaging.
|
456 |
+
2. If you're not satisfied with the similarity, try increasing the weight of "IdentityNet Strength" and "Adapter Strength."
|
457 |
+
3. If you feel that the saturation is too high, first decrease the Adapter strength. If it remains too high, then decrease the IdentityNet strength.
|
458 |
+
4. If you find that text control is not as expected, decrease Adapter strength.
|
459 |
+
5. If you find that realistic style is not good enough, go for our Github repo and use a more realistic base model.
|
460 |
"""
|
461 |
|
462 |
css = """
|
|
|
469 |
with gr.Row():
|
470 |
with gr.Column():
|
471 |
with gr.Row(equal_height=True):
|
472 |
+
# Change from single image to multiple files
|
473 |
+
face_files = gr.Files(
|
474 |
+
label="Upload photos of your face (1 or more)",
|
475 |
+
file_types=["image"]
|
476 |
)
|
477 |
+
|
478 |
+
face_gallery = gr.Gallery(
|
479 |
+
label="Your uploaded face images",
|
480 |
+
visible=True,
|
481 |
+
columns=5,
|
482 |
+
rows=1,
|
483 |
+
height=150
|
484 |
+
)
|
485 |
|
486 |
# prompt
|
487 |
prompt = gr.Textbox(
|
|
|
526 |
)
|
527 |
controlnet_selection = gr.CheckboxGroup(
|
528 |
["canny", "depth"], label="Controlnet", value=[],
|
529 |
+
info="Use canny for edge detection, and depth for depth map estimation to control the generation process"
|
530 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
canny_strength = gr.Slider(
|
532 |
label="Canny strength",
|
533 |
minimum=0,
|
534 |
maximum=1.5,
|
535 |
step=0.05,
|
536 |
+
value=0.3,
|
537 |
)
|
538 |
depth_strength = gr.Slider(
|
539 |
label="Depth strength",
|
540 |
minimum=0,
|
541 |
maximum=1.5,
|
542 |
step=0.05,
|
543 |
+
value=0.5,
|
544 |
)
|
545 |
with gr.Accordion(open=False, label="Advanced Options"):
|
546 |
negative_prompt = gr.Textbox(
|
|
|
591 |
label="InstantID Usage Tips", value=tips, visible=False
|
592 |
)
|
593 |
|
594 |
+
# Connect file uploads to update the gallery
|
595 |
+
face_files.upload(fn=update_face_gallery, inputs=face_files, outputs=face_gallery)
|
596 |
+
|
597 |
submit.click(
|
598 |
fn=remove_tips,
|
599 |
outputs=usage_tips,
|
|
|
606 |
).then(
|
607 |
fn=generate_image,
|
608 |
inputs=[
|
609 |
+
face_files, # Changed from face_file to face_files
|
610 |
pose_file,
|
611 |
prompt,
|
612 |
negative_prompt,
|
|
|
614 |
num_steps,
|
615 |
identitynet_strength_ratio,
|
616 |
adapter_strength_ratio,
|
|
|
617 |
canny_strength,
|
618 |
depth_strength,
|
619 |
controlnet_selection,
|
|
|
635 |
|
636 |
gr.Examples(
|
637 |
examples=get_example(),
|
638 |
+
inputs=[face_files, pose_file, prompt, style, negative_prompt],
|
639 |
fn=run_for_examples,
|
640 |
outputs=[gallery, usage_tips],
|
641 |
cache_examples=True,
|