Ahsen Khaliq commited on
Commit
c62bf4d
Β·
1 Parent(s): cf952b4
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -183,7 +183,7 @@ args = argparse.Namespace(
183
  clip_model='ViT-B/32',
184
  vqgan_config=f'{model_name}.yaml',
185
  vqgan_checkpoint=f'{model_name}.ckpt',
186
- step_size=0.25,
187
  cutn=4,
188
  cut_pow=1.,
189
  display_freq=images_interval,
@@ -194,7 +194,7 @@ print('Using device:', device)
194
  model = load_vqgan_model(args.vqgan_config, args.vqgan_checkpoint).to(device)
195
  perceptor = clip.load(args.clip_model, jit=False)[0].eval().requires_grad_(False).to(device)
196
 
197
- def inference(text):
198
  texts = text
199
  target_images = ""
200
  max_iterations = 100
@@ -214,10 +214,10 @@ def inference(text):
214
  print('Using texts:', texts)
215
  if target_images:
216
  print('Using image prompts:', target_images)
217
- if args.seed is None or args.seed == -1:
218
  seed = torch.seed()
219
  else:
220
- seed = args.seed
221
  torch.manual_seed(seed)
222
  print('Using seed:', seed)
223
  # clock=deepcopy(perceptor.visual.positional_embedding.data)
@@ -262,7 +262,7 @@ def inference(text):
262
  z = torch.rand_like(z)*2
263
  z_orig = z.clone()
264
  z.requires_grad_(True)
265
- opt = optim.Adam([z], lr=args.step_size)
266
  normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
267
  std=[0.26862954, 0.26130258, 0.27577711])
268
  pMs = []
@@ -343,7 +343,7 @@ def load_image( infilename ) :
343
  data = np.asarray( img, dtype="int32" )
344
  return data
345
 
346
- def throttled_inference(text):
347
  global inferences_running
348
  current = inferences_running
349
  if current >= 1:
@@ -353,18 +353,21 @@ def throttled_inference(text):
353
  print(f"Inference starting when we already had {current} running")
354
  inferences_running += 1
355
  try:
356
- return inference(text)
357
  finally:
358
  print("Inference finished")
359
  inferences_running -= 1
360
 
361
 
362
  title = "VQGAN + CLIP"
363
- description = "Gradio demo for VQGAN + CLIP. To use it, simply add your text, or click one of the examples to load them. Read more at the links below. Please click submit only once"
364
- article = "<p style='text-align: center'>Originally made by Katherine Crowson (https://github.com/crowsonkb, https://twitter.com/RiversHaveWings). The original BigGAN+CLIP method was by https://twitter.com/advadnoun. Added some explanations and modifications by Eleiber#8347, pooling trick by Crimeacs#8222 (https://twitter.com/EarthML1) and the GUI was made with the help of Abulafia#3734. | <a href='https://colab.research.google.com/drive/1ZAus_gn2RhTZWzOWUpPERNC0Q8OhZRTZ'>Colab</a> | <a href='https://github.com/CompVis/taming-transformers'>Taming Transformers Github Repo</a> | <a href='https://github.com/openai/CLIP'>CLIP Github Repo</a> Special Thanks to BoneAmputee (https://twitter.com/BoneAmputee) for suggestions and advice</p>"
365
  gr.Interface(
366
  throttled_inference,
367
- gr.inputs.Textbox(label="Input"),
 
 
 
368
  gr.outputs.Image(type="numpy", label="Output"),
369
  title=title,
370
  description=description,
 
183
  clip_model='ViT-B/32',
184
  vqgan_config=f'{model_name}.yaml',
185
  vqgan_checkpoint=f'{model_name}.ckpt',
186
+ step_size=0.15,
187
  cutn=4,
188
  cut_pow=1.,
189
  display_freq=images_interval,
 
194
  model = load_vqgan_model(args.vqgan_config, args.vqgan_checkpoint).to(device)
195
  perceptor = clip.load(args.clip_model, jit=False)[0].eval().requires_grad_(False).to(device)
196
 
197
+ def inference(text, seed, step_size):
198
  texts = text
199
  target_images = ""
200
  max_iterations = 100
 
214
  print('Using texts:', texts)
215
  if target_images:
216
  print('Using image prompts:', target_images)
217
+ if seed is None or seed == -1:
218
  seed = torch.seed()
219
  else:
220
+ seed = seed
221
  torch.manual_seed(seed)
222
  print('Using seed:', seed)
223
  # clock=deepcopy(perceptor.visual.positional_embedding.data)
 
262
  z = torch.rand_like(z)*2
263
  z_orig = z.clone()
264
  z.requires_grad_(True)
265
+ opt = optim.Adam([z], lr=step_size)
266
  normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
267
  std=[0.26862954, 0.26130258, 0.27577711])
268
  pMs = []
 
343
  data = np.asarray( img, dtype="int32" )
344
  return data
345
 
346
+ def throttled_inference(text, seed, step_size):
347
  global inferences_running
348
  current = inferences_running
349
  if current >= 1:
 
353
  print(f"Inference starting when we already had {current} running")
354
  inferences_running += 1
355
  try:
356
+ return inference(text, seed, step_size)
357
  finally:
358
  print("Inference finished")
359
  inferences_running -= 1
360
 
361
 
362
  title = "VQGAN + CLIP"
363
+ description = "Gradio demo for VQGAN + CLIP. To use it, simply add your text, or click one of the examples to load them. Read more at the links below. Please click submit only once. Results will show up in under a minute."
364
+ article = "<p style='text-align: center'>Originally made by Katherine Crowson (https://github.com/crowsonkb, https://twitter.com/RiversHaveWings). The original BigGAN+CLIP method was by https://twitter.com/advadnoun. Added some explanations and modifications by Eleiber#8347, pooling trick by Crimeacs#8222 (https://twitter.com/EarthML1) and the GUI was made with the help of Abulafia#3734. | <a href='https://colab.research.google.com/drive/1ZAus_gn2RhTZWzOWUpPERNC0Q8OhZRTZ'>Colab</a> | <a href='https://github.com/CompVis/taming-transformers'>Taming Transformers Github Repo</a> | <a href='https://github.com/openai/CLIP'>CLIP Github Repo</a> | Special thanks to BoneAmputee (https://twitter.com/BoneAmputee) for suggestions and advice</p>"
365
  gr.Interface(
366
  throttled_inference,
367
+ [gr.inputs.Textbox(label="Input"),
368
+ gr.inputs.Number(default=42, label="seed"),
369
+ gr.inputs.Slider(minimum=0.1, maximum=0.9, default=0.23, label='step size')
370
+ ],
371
  gr.outputs.Image(type="numpy", label="Output"),
372
  title=title,
373
  description=description,