linoyts HF Staff commited on
Commit
37d085c
·
verified ·
1 Parent(s): 39886f2
Files changed (1) hide show
  1. app.py +366 -54
app.py CHANGED
@@ -4,8 +4,9 @@ import random
4
  import torch
5
  import spaces
6
  import os
 
7
 
8
- from PIL import Image
9
  import torch
10
  import math
11
 
@@ -138,6 +139,130 @@ Return only the rewritten instruction text directly, without JSON formatting or
138
 
139
  return polish_prompt_hf(full_prompt, SYSTEM_PROMPT)
140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  # --- Model Loading ---
143
  dtype = torch.bfloat16
@@ -146,18 +271,57 @@ pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype
146
  pipe.transformer.__class__ = QwenImageTransformer2DModel
147
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
148
 
149
-
150
  # --- Ahead-of-time compilation ---
151
  optimize_pipeline_(pipe, image=Image.new("RGB", (1024, 1024)), prompt="prompt")
152
 
153
  # --- UI Constants and Helpers ---
154
  MAX_SEED = np.iinfo(np.int32).max
155
 
156
- # --- Main Inference Function (with hardcoded negative prompt) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  @spaces.GPU(duration=120)
158
  def infer(
159
  image,
160
  prompt,
 
 
 
 
 
 
 
 
 
 
161
  seed=42,
162
  randomize_seed=False,
163
  true_guidance_scale=4.0,
@@ -166,7 +330,7 @@ def infer(
166
  progress=gr.Progress(track_tqdm=True),
167
  ):
168
  """
169
- Generates an image using the local Qwen-Image diffusers pipeline.
170
  """
171
  # Hardcode the negative prompt as requested
172
  negative_prompt = " "
@@ -177,7 +341,7 @@ def infer(
177
  # Set up the generator for reproducibility
178
  generator = torch.Generator(device=device).manual_seed(seed)
179
 
180
- print(f"Calling pipeline with prompt: '{prompt}'")
181
  print(f"Negative Prompt: '{negative_prompt}'")
182
  print(f"Seed: {seed}, Steps: {num_inference_steps}")
183
 
@@ -185,9 +349,27 @@ def infer(
185
  prompt = polish_prompt(prompt, image)
186
  print(f"Rewritten Prompt: {prompt}")
187
 
188
- # Generate the image
189
- image = pipe(
190
- image,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  prompt=prompt,
192
  negative_prompt=negative_prompt,
193
  num_inference_steps=num_inference_steps,
@@ -195,7 +377,7 @@ def infer(
195
  true_cfg_scale=true_guidance_scale,
196
  ).images[0]
197
 
198
- return image, seed
199
 
200
  # --- Examples and UI Layout ---
201
  examples = []
@@ -206,6 +388,12 @@ css = """
206
  max-width: 1024px;
207
  }
208
  #edit_text{margin-top: -62px !important}
 
 
 
 
 
 
209
  """
210
 
211
  with gr.Blocks(css=css) as demo:
@@ -216,59 +404,174 @@ with gr.Blocks(css=css) as demo:
216
  </div>
217
  """)
218
  gr.Markdown("""
 
 
 
 
219
  [Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series.
220
- Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit) to run locally with ComfyUI or diffusers.
221
  """)
 
222
  with gr.Row():
223
  with gr.Column():
224
- input_image = gr.Image(label="Input Image", show_label=False, type="pil")
 
225
  prompt = gr.Text(
226
  label="Prompt",
227
- show_label=False,
228
- placeholder="describe the edit instruction",
229
  container=False,
230
  )
231
- run_button = gr.Button("Edit!", variant="primary")
232
-
233
- result = gr.Image(label="Result", show_label=False, type="pil")
234
-
235
- with gr.Accordion("Advanced Settings", open=False):
236
- # Negative prompt UI element is removed here
237
-
238
- seed = gr.Slider(
239
- label="Seed",
240
- minimum=0,
241
- maximum=MAX_SEED,
242
- step=1,
243
- value=0,
244
- )
245
-
246
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
247
-
248
- with gr.Row():
249
-
250
- true_guidance_scale = gr.Slider(
251
- label="True guidance scale",
252
- minimum=1.0,
253
- maximum=10.0,
254
- step=0.1,
255
- value=1.0
256
- )
257
-
258
- num_inference_steps = gr.Slider(
259
- label="Number of inference steps",
260
- minimum=1,
261
- maximum=50,
262
- step=1,
263
- value=50,
264
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
- rewrite_prompt = gr.Checkbox(
267
- label="Enhance prompt (using HF Inference)",
268
- value=True
269
- )
270
-
271
- # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
  gr.on(
274
  triggers=[run_button.click, prompt.submit],
@@ -276,7 +579,16 @@ with gr.Blocks(css=css) as demo:
276
  inputs=[
277
  input_image,
278
  prompt,
279
- # negative_prompt is no longer an input from the UI
 
 
 
 
 
 
 
 
 
280
  seed,
281
  randomize_seed,
282
  true_guidance_scale,
 
4
  import torch
5
  import spaces
6
  import os
7
+ import json
8
 
9
+ from PIL import Image, ImageDraw
10
  import torch
11
  import math
12
 
 
139
 
140
  return polish_prompt_hf(full_prompt, SYSTEM_PROMPT)
141
 
142
+ # --- Outpainting Functions ---
143
+ def can_expand(source_width, source_height, target_width, target_height, alignment):
144
+ """Checks if the image can be expanded based on the alignment."""
145
+ if alignment in ("Left", "Right") and source_width >= target_width:
146
+ return False
147
+ if alignment in ("Top", "Bottom") and source_height >= target_height:
148
+ return False
149
+ return True
150
+
151
+ def prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom):
152
+ """Prepares the image with white margins and creates a mask for outpainting."""
153
+ target_size = (width, height)
154
+
155
+ # Calculate the scaling factor to fit the image within the target size
156
+ scale_factor = min(target_size[0] / image.width, target_size[1] / image.height)
157
+ new_width = int(image.width * scale_factor)
158
+ new_height = int(image.height * scale_factor)
159
+
160
+ # Resize the source image to fit within target size
161
+ source = image.resize((new_width, new_height), Image.LANCZOS)
162
+
163
+ # Apply resize option using percentages
164
+ if resize_option == "Full":
165
+ resize_percentage = 100
166
+ elif resize_option == "50%":
167
+ resize_percentage = 50
168
+ elif resize_option == "33%":
169
+ resize_percentage = 33
170
+ elif resize_option == "25%":
171
+ resize_percentage = 25
172
+ else: # Custom
173
+ resize_percentage = custom_resize_percentage
174
+
175
+ # Calculate new dimensions based on percentage
176
+ resize_factor = resize_percentage / 100
177
+ new_width = int(source.width * resize_factor)
178
+ new_height = int(source.height * resize_factor)
179
+
180
+ # Ensure minimum size of 64 pixels
181
+ new_width = max(new_width, 64)
182
+ new_height = max(new_height, 64)
183
+
184
+ # Resize the image
185
+ source = source.resize((new_width, new_height), Image.LANCZOS)
186
+
187
+ # Calculate the overlap in pixels based on the percentage
188
+ overlap_x = int(new_width * (overlap_percentage / 100))
189
+ overlap_y = int(new_height * (overlap_percentage / 100))
190
+
191
+ # Ensure minimum overlap of 1 pixel
192
+ overlap_x = max(overlap_x, 1)
193
+ overlap_y = max(overlap_y, 1)
194
+
195
+ # Calculate margins based on alignment
196
+ if alignment == "Middle":
197
+ margin_x = (target_size[0] - new_width) // 2
198
+ margin_y = (target_size[1] - new_height) // 2
199
+ elif alignment == "Left":
200
+ margin_x = 0
201
+ margin_y = (target_size[1] - new_height) // 2
202
+ elif alignment == "Right":
203
+ margin_x = target_size[0] - new_width
204
+ margin_y = (target_size[1] - new_height) // 2
205
+ elif alignment == "Top":
206
+ margin_x = (target_size[0] - new_width) // 2
207
+ margin_y = 0
208
+ elif alignment == "Bottom":
209
+ margin_x = (target_size[0] - new_width) // 2
210
+ margin_y = target_size[1] - new_height
211
+
212
+ # Adjust margins to eliminate gaps
213
+ margin_x = max(0, min(margin_x, target_size[0] - new_width))
214
+ margin_y = max(0, min(margin_y, target_size[1] - new_height))
215
+
216
+ # Create a new background image with white margins and paste the resized source image
217
+ background = Image.new('RGB', target_size, (255, 255, 255))
218
+ background.paste(source, (margin_x, margin_y))
219
+
220
+ # Create the mask
221
+ mask = Image.new('L', target_size, 255)
222
+ mask_draw = ImageDraw.Draw(mask)
223
+
224
+ # Calculate overlap areas
225
+ white_gaps_patch = 2
226
+ left_overlap = margin_x + overlap_x if overlap_left else margin_x + white_gaps_patch
227
+ right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width - white_gaps_patch
228
+ top_overlap = margin_y + overlap_y if overlap_top else margin_y + white_gaps_patch
229
+ bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height - white_gaps_patch
230
+
231
+ if alignment == "Left":
232
+ left_overlap = margin_x + overlap_x if overlap_left else margin_x
233
+ elif alignment == "Right":
234
+ right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width
235
+ elif alignment == "Top":
236
+ top_overlap = margin_y + overlap_y if overlap_top else margin_y
237
+ elif alignment == "Bottom":
238
+ bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height
239
+
240
+ # Draw the mask
241
+ mask_draw.rectangle([
242
+ (left_overlap, top_overlap),
243
+ (right_overlap, bottom_overlap)
244
+ ], fill=0)
245
+
246
+ return background, mask
247
+
248
+ def preview_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom):
249
+ """Creates a preview showing the mask overlay."""
250
+ background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom)
251
+
252
+ # Create a preview image showing the mask
253
+ preview = background.copy().convert('RGBA')
254
+
255
+ # Create a semi-transparent red overlay
256
+ red_overlay = Image.new('RGBA', background.size, (255, 0, 0, 64)) # Reduced alpha to 64 (25% opacity)
257
+
258
+ # Convert black pixels in the mask to semi-transparent red
259
+ red_mask = Image.new('RGBA', background.size, (0, 0, 0, 0))
260
+ red_mask.paste(red_overlay, (0, 0), mask)
261
+
262
+ # Overlay the red mask on the background
263
+ preview = Image.alpha_composite(preview, red_mask)
264
+
265
+ return preview
266
 
267
  # --- Model Loading ---
268
  dtype = torch.bfloat16
 
271
  pipe.transformer.__class__ = QwenImageTransformer2DModel
272
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
273
 
 
274
  # --- Ahead-of-time compilation ---
275
  optimize_pipeline_(pipe, image=Image.new("RGB", (1024, 1024)), prompt="prompt")
276
 
277
  # --- UI Constants and Helpers ---
278
  MAX_SEED = np.iinfo(np.int32).max
279
 
280
+ def preload_presets(target_ratio, ui_width, ui_height):
281
+ """Updates the width and height sliders based on the selected aspect ratio."""
282
+ if target_ratio == "9:16":
283
+ changed_width = 720
284
+ changed_height = 1280
285
+ return changed_width, changed_height, gr.update()
286
+ elif target_ratio == "16:9":
287
+ changed_width = 1280
288
+ changed_height = 720
289
+ return changed_width, changed_height, gr.update()
290
+ elif target_ratio == "1:1":
291
+ changed_width = 1024
292
+ changed_height = 1024
293
+ return changed_width, changed_height, gr.update()
294
+ elif target_ratio == "Custom":
295
+ return ui_width, ui_height, gr.update(open=True)
296
+
297
+ def select_the_right_preset(user_width, user_height):
298
+ if user_width == 720 and user_height == 1280:
299
+ return "9:16"
300
+ elif user_width == 1280 and user_height == 720:
301
+ return "16:9"
302
+ elif user_width == 1024 and user_height == 1024:
303
+ return "1:1"
304
+ else:
305
+ return "Custom"
306
+
307
+ def toggle_custom_resize_slider(resize_option):
308
+ return gr.update(visible=(resize_option == "Custom"))
309
+
310
+ # --- Main Inference Function (with outpainting preprocessing) ---
311
  @spaces.GPU(duration=120)
312
  def infer(
313
  image,
314
  prompt,
315
+ width,
316
+ height,
317
+ overlap_percentage,
318
+ resize_option,
319
+ custom_resize_percentage,
320
+ alignment,
321
+ overlap_left,
322
+ overlap_right,
323
+ overlap_top,
324
+ overlap_bottom,
325
  seed=42,
326
  randomize_seed=False,
327
  true_guidance_scale=4.0,
 
330
  progress=gr.Progress(track_tqdm=True),
331
  ):
332
  """
333
+ Generates an outpainted image using the Qwen-Image-Edit pipeline.
334
  """
335
  # Hardcode the negative prompt as requested
336
  negative_prompt = " "
 
341
  # Set up the generator for reproducibility
342
  generator = torch.Generator(device=device).manual_seed(seed)
343
 
344
+ print(f"Original Prompt: '{prompt}'")
345
  print(f"Negative Prompt: '{negative_prompt}'")
346
  print(f"Seed: {seed}, Steps: {num_inference_steps}")
347
 
 
349
  prompt = polish_prompt(prompt, image)
350
  print(f"Rewritten Prompt: {prompt}")
351
 
352
+ # Prepare the image with white margins for outpainting
353
+ outpaint_image, mask = prepare_image_and_mask(
354
+ image, width, height, overlap_percentage,
355
+ resize_option, custom_resize_percentage, alignment,
356
+ overlap_left, overlap_right, overlap_top, overlap_bottom
357
+ )
358
+
359
+ # Check if expansion is possible
360
+ if not can_expand(image.width, image.height, width, height, alignment):
361
+ alignment = "Middle"
362
+ outpaint_image, mask = prepare_image_and_mask(
363
+ image, width, height, overlap_percentage,
364
+ resize_option, custom_resize_percentage, "Middle",
365
+ overlap_left, overlap_right, overlap_top, overlap_bottom
366
+ )
367
+
368
+ print(f"Outpaint dimensions: {outpaint_image.size}")
369
+
370
+ # Generate the image with outpainting preprocessing
371
+ result_image = pipe(
372
+ outpaint_image, # Use the preprocessed image with white margins
373
  prompt=prompt,
374
  negative_prompt=negative_prompt,
375
  num_inference_steps=num_inference_steps,
 
377
  true_cfg_scale=true_guidance_scale,
378
  ).images[0]
379
 
380
+ return result_image, seed
381
 
382
  # --- Examples and UI Layout ---
383
  examples = []
 
388
  max-width: 1024px;
389
  }
390
  #edit_text{margin-top: -62px !important}
391
+ .preview-container {
392
+ border: 1px solid #e0e0e0;
393
+ border-radius: 8px;
394
+ padding: 10px;
395
+ margin-top: 10px;
396
+ }
397
  """
398
 
399
  with gr.Blocks(css=css) as demo:
 
404
  </div>
405
  """)
406
  gr.Markdown("""
407
+ ## Qwen-Image Edit with Outpainting
408
+
409
+ Extend your images beyond their original boundaries with intelligent outpainting. The model will generate new content that seamlessly blends with your original image.
410
+
411
  [Learn more](https://github.com/QwenLM/Qwen-Image) about the Qwen-Image series.
412
+ Try on [Qwen Chat](https://chat.qwen.ai/), or [download model](https://huggingface.co/Qwen/Qwen-Image-Edit) to run locally.
413
  """)
414
+
415
  with gr.Row():
416
  with gr.Column():
417
+ input_image = gr.Image(label="Input Image", type="pil")
418
+
419
  prompt = gr.Text(
420
  label="Prompt",
421
+ placeholder="Describe what should appear in the extended areas",
 
422
  container=False,
423
  )
424
+
425
+ with gr.Row():
426
+ target_ratio = gr.Radio(
427
+ label="Target Ratio",
428
+ choices=["9:16", "16:9", "1:1", "Custom"],
429
+ value="16:9",
430
+ scale=2
431
+ )
432
+ alignment_dropdown = gr.Dropdown(
433
+ choices=["Middle", "Left", "Right", "Top", "Bottom"],
434
+ value="Middle",
435
+ label="Alignment"
436
+ )
437
+
438
+ run_button = gr.Button("Outpaint!", variant="primary")
439
+
440
+ with gr.Accordion("Outpainting Settings", open=False) as settings_panel:
441
+ with gr.Row():
442
+ width_slider = gr.Slider(
443
+ label="Target Width",
444
+ minimum=512,
445
+ maximum=2048,
446
+ step=8,
447
+ value=1280,
448
+ )
449
+ height_slider = gr.Slider(
450
+ label="Target Height",
451
+ minimum=512,
452
+ maximum=2048,
453
+ step=8,
454
+ value=720,
455
+ )
456
+
457
+ with gr.Group():
458
+ overlap_percentage = gr.Slider(
459
+ label="Mask overlap (%)",
460
+ minimum=1,
461
+ maximum=50,
462
+ value=10,
463
+ step=1,
464
+ info="Controls the blending area between original and new content"
465
+ )
466
+
467
+ with gr.Row():
468
+ overlap_top = gr.Checkbox(label="Overlap Top", value=True)
469
+ overlap_right = gr.Checkbox(label="Overlap Right", value=True)
470
+ with gr.Row():
471
+ overlap_left = gr.Checkbox(label="Overlap Left", value=True)
472
+ overlap_bottom = gr.Checkbox(label="Overlap Bottom", value=True)
473
+
474
+ with gr.Row():
475
+ resize_option = gr.Radio(
476
+ label="Resize input image",
477
+ choices=["Full", "50%", "33%", "25%", "Custom"],
478
+ value="Full",
479
+ info="How much of the target canvas the original image should occupy"
480
+ )
481
+ custom_resize_percentage = gr.Slider(
482
+ label="Custom resize (%)",
483
+ minimum=1,
484
+ maximum=100,
485
+ step=1,
486
+ value=50,
487
+ visible=False
488
+ )
489
+
490
+ preview_button = gr.Button("Preview alignment and mask", variant="secondary")
491
+
492
+ with gr.Accordion("Advanced Settings", open=False):
493
+ seed = gr.Slider(
494
+ label="Seed",
495
+ minimum=0,
496
+ maximum=MAX_SEED,
497
+ step=1,
498
+ value=0,
499
+ )
500
+
501
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
502
+
503
+ with gr.Row():
504
+ true_guidance_scale = gr.Slider(
505
+ label="True guidance scale",
506
+ minimum=1.0,
507
+ maximum=10.0,
508
+ step=0.1,
509
+ value=1.0
510
+ )
511
+
512
+ num_inference_steps = gr.Slider(
513
+ label="Number of inference steps",
514
+ minimum=1,
515
+ maximum=50,
516
+ step=1,
517
+ value=50,
518
+ )
519
+
520
+ rewrite_prompt = gr.Checkbox(
521
+ label="Enhance prompt (using HF Inference)",
522
+ value=True
523
+ )
524
 
525
+ with gr.Column():
526
+ result = gr.Image(label="Result", type="pil")
527
+
528
+ with gr.Column(visible=False) as preview_container:
529
+ preview_image = gr.Image(label="Preview (red area will be generated)", type="pil")
530
+
531
+ # Event handlers
532
+ target_ratio.change(
533
+ fn=preload_presets,
534
+ inputs=[target_ratio, width_slider, height_slider],
535
+ outputs=[width_slider, height_slider, settings_panel],
536
+ queue=False,
537
+ )
538
+
539
+ width_slider.change(
540
+ fn=select_the_right_preset,
541
+ inputs=[width_slider, height_slider],
542
+ outputs=[target_ratio],
543
+ queue=False,
544
+ )
545
+
546
+ height_slider.change(
547
+ fn=select_the_right_preset,
548
+ inputs=[width_slider, height_slider],
549
+ outputs=[target_ratio],
550
+ queue=False,
551
+ )
552
+
553
+ resize_option.change(
554
+ fn=toggle_custom_resize_slider,
555
+ inputs=[resize_option],
556
+ outputs=[custom_resize_percentage],
557
+ queue=False,
558
+ )
559
+
560
+ preview_button.click(
561
+ fn=lambda: gr.update(visible=True),
562
+ inputs=None,
563
+ outputs=[preview_container],
564
+ queue=False,
565
+ ).then(
566
+ fn=preview_image_and_mask,
567
+ inputs=[
568
+ input_image, width_slider, height_slider, overlap_percentage,
569
+ resize_option, custom_resize_percentage, alignment_dropdown,
570
+ overlap_left, overlap_right, overlap_top, overlap_bottom
571
+ ],
572
+ outputs=preview_image,
573
+ queue=False,
574
+ )
575
 
576
  gr.on(
577
  triggers=[run_button.click, prompt.submit],
 
579
  inputs=[
580
  input_image,
581
  prompt,
582
+ width_slider,
583
+ height_slider,
584
+ overlap_percentage,
585
+ resize_option,
586
+ custom_resize_percentage,
587
+ alignment_dropdown,
588
+ overlap_left,
589
+ overlap_right,
590
+ overlap_top,
591
+ overlap_bottom,
592
  seed,
593
  randomize_seed,
594
  true_guidance_scale,