Tony Lian commited on
Commit
fef98ae
1 Parent(s): 853e04d

Update default steps

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -121,7 +121,7 @@ def get_baseline_image(prompt, seed=0):
121
  prompt = prompt_placeholder
122
 
123
  scheduler_key = "dpm_scheduler"
124
- num_inference_steps = 50
125
 
126
  image_np = run_baseline(prompt, bg_seed=seed, scheduler_key=scheduler_key, num_inference_steps=num_inference_steps)
127
  return [image_np]
@@ -202,7 +202,7 @@ html = f"""<h1>LLM-grounded Diffusion: Enhancing Prompt Understanding of Text-to
202
  <p>1. If ChatGPT doesn't generate layout, add/remove the trailing space (added by default) and/or use GPT-4.</p>
203
  <p>2. You can perform multi-round specification by giving ChatGPT follow-up requests (e.g., make the object boxes bigger).</p>
204
  <p>3. You can also try prompts in Simplified Chinese. If you want to try prompts in another language, translate the first line of last example to your language.</p>
205
- <p>4. The diffusion model only runs 50 steps by default in this demo. You can make it run more/fewer steps to get higher quality images or faster generation (or tweak frozen steps/guidance steps for better guidance and coherence).</p>
206
  <p>5. Duplicate this space and add GPU or clone the space and run locally to skip the queue and run our model faster. (<b>Currently we are using a T4 GPU on this space, which is quite slow, and you can add a A10G to make it 5x faster</b>) {duplicate_html}</p>
207
  <br/>
208
  <p>Implementation note: In this demo, we replace the attention manipulation in our layout-guided Stable Diffusion described in our paper with GLIGEN due to much faster inference speed (<b>FlashAttention supported, no backprop needed</b> during inference). Compared to vanilla GLIGEN, we have better coherence. Other parts of text-to-image pipeline, including single object generation and SAM, remain the same. The settings and examples in the prompt are simplified in this demo.</p>
@@ -238,7 +238,7 @@ with gr.Blocks(
238
  with gr.Column(scale=1):
239
  response = gr.Textbox(lines=8, label="Paste ChatGPT response here (no original caption needed)", placeholder=layout_placeholder)
240
  overall_prompt_override = gr.Textbox(lines=2, label="Prompt for overall generation (optional but recommended)", placeholder="You can put your input prompt for layout generation here, helpful if your scene cannot be represented by background prompt and boxes only, e.g., with object interactions. If left empty: background prompt with [objects].", value="")
241
- num_inference_steps = gr.Slider(1, 250, value=50, step=1, label="Number of denoising steps (set to 20 to trade quality for faster generation)")
242
  seed = gr.Slider(0, 10000, value=0, step=1, label="Seed")
243
  with gr.Accordion("Advanced options (play around for better generation)", open=False):
244
  frozen_step_ratio = gr.Slider(0, 1, value=0.4, step=0.1, label="Foreground frozen steps ratio (higher: preserve object attributes; lower: higher coherence; set to 0: (almost) equivalent to vanilla GLIGEN except details)")
 
121
  prompt = prompt_placeholder
122
 
123
  scheduler_key = "dpm_scheduler"
124
+ num_inference_steps = 20
125
 
126
  image_np = run_baseline(prompt, bg_seed=seed, scheduler_key=scheduler_key, num_inference_steps=num_inference_steps)
127
  return [image_np]
 
202
  <p>1. If ChatGPT doesn't generate layout, add/remove the trailing space (added by default) and/or use GPT-4.</p>
203
  <p>2. You can perform multi-round specification by giving ChatGPT follow-up requests (e.g., make the object boxes bigger).</p>
204
  <p>3. You can also try prompts in Simplified Chinese. If you want to try prompts in another language, translate the first line of last example to your language.</p>
205
+ <p>4. The diffusion model only runs 20 steps by default in this demo. You can make it run more steps to get higher quality images (or tweak frozen steps/guidance steps for better guidance and coherence).</p>
206
  <p>5. Duplicate this space and add GPU or clone the space and run locally to skip the queue and run our model faster. (<b>Currently we are using a T4 GPU on this space, which is quite slow, and you can add a A10G to make it 5x faster</b>) {duplicate_html}</p>
207
  <br/>
208
  <p>Implementation note: In this demo, we replace the attention manipulation in our layout-guided Stable Diffusion described in our paper with GLIGEN due to much faster inference speed (<b>FlashAttention supported, no backprop needed</b> during inference). Compared to vanilla GLIGEN, we have better coherence. Other parts of text-to-image pipeline, including single object generation and SAM, remain the same. The settings and examples in the prompt are simplified in this demo.</p>
 
238
  with gr.Column(scale=1):
239
  response = gr.Textbox(lines=8, label="Paste ChatGPT response here (no original caption needed)", placeholder=layout_placeholder)
240
  overall_prompt_override = gr.Textbox(lines=2, label="Prompt for overall generation (optional but recommended)", placeholder="You can put your input prompt for layout generation here, helpful if your scene cannot be represented by background prompt and boxes only, e.g., with object interactions. If left empty: background prompt with [objects].", value="")
241
+ num_inference_steps = gr.Slider(1, 250, value=20, step=1, label="Number of denoising steps (set to >=50 for higher generation quality)")
242
  seed = gr.Slider(0, 10000, value=0, step=1, label="Seed")
243
  with gr.Accordion("Advanced options (play around for better generation)", open=False):
244
  frozen_step_ratio = gr.Slider(0, 1, value=0.4, step=0.1, label="Foreground frozen steps ratio (higher: preserve object attributes; lower: higher coherence; set to 0: (almost) equivalent to vanilla GLIGEN except details)")