Spaces:

EleutherAI
/

VQGAN_CLIP

Runtime error

App Files Files Community

Ahsen Khaliq commited on Aug 3, 2021

Commit

c62bf4d

1 Parent(s): cf952b4

updates

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -183,7 +183,7 @@ args = argparse.Namespace(
     clip_model='ViT-B/32',
     vqgan_config=f'{model_name}.yaml',
     vqgan_checkpoint=f'{model_name}.ckpt',
-    step_size=0.25,
     cutn=4,
     cut_pow=1.,
     display_freq=images_interval,
@@ -194,7 +194,7 @@ print('Using device:', device)
 model = load_vqgan_model(args.vqgan_config, args.vqgan_checkpoint).to(device)
 perceptor = clip.load(args.clip_model, jit=False)[0].eval().requires_grad_(False).to(device)
-def inference(text):
     texts = text
     target_images = ""
     max_iterations = 100
@@ -214,10 +214,10 @@ def inference(text):
         print('Using texts:', texts)
     if target_images:
         print('Using image prompts:', target_images)
-    if args.seed is None or args.seed == -1:
         seed = torch.seed()
     else:
-        seed = args.seed
     torch.manual_seed(seed)
     print('Using seed:', seed)
     # clock=deepcopy(perceptor.visual.positional_embedding.data)
@@ -262,7 +262,7 @@ def inference(text):
         z = torch.rand_like(z)*2
     z_orig = z.clone()
     z.requires_grad_(True)
-    opt = optim.Adam([z], lr=args.step_size)
     normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
                                     std=[0.26862954, 0.26130258, 0.27577711])
     pMs = []
@@ -343,7 +343,7 @@ def load_image( infilename ) :
     data = np.asarray( img, dtype="int32" )
     return data
-def throttled_inference(text):
     global inferences_running
     current = inferences_running
     if current >= 1:
@@ -353,18 +353,21 @@ def throttled_inference(text):
     print(f"Inference starting when we already had {current} running")
     inferences_running += 1
     try:
-        return inference(text)
     finally:
         print("Inference finished")
         inferences_running -= 1
 title = "VQGAN + CLIP"
-description = "Gradio demo for VQGAN + CLIP. To use it, simply add your text, or click one of the examples to load them. Read more at the links below. Please click submit only once"
-article = "<p style='text-align: center'>Originally made by Katherine Crowson (https://github.com/crowsonkb, https://twitter.com/RiversHaveWings). The original BigGAN+CLIP method was by https://twitter.com/advadnoun. Added some explanations and modifications by Eleiber#8347, pooling trick by Crimeacs#8222 (https://twitter.com/EarthML1) and the GUI was made with the help of Abulafia#3734. | <a href='https://colab.research.google.com/drive/1ZAus_gn2RhTZWzOWUpPERNC0Q8OhZRTZ'>Colab</a> | <a href='https://github.com/CompVis/taming-transformers'>Taming Transformers Github Repo</a> | <a href='https://github.com/openai/CLIP'>CLIP Github Repo</a> Special Thanks to BoneAmputee (https://twitter.com/BoneAmputee) for suggestions and advice</p>"
 gr.Interface(
     throttled_inference,
-    gr.inputs.Textbox(label="Input"),
     gr.outputs.Image(type="numpy", label="Output"),
     title=title,
     description=description,

     clip_model='ViT-B/32',
     vqgan_config=f'{model_name}.yaml',
     vqgan_checkpoint=f'{model_name}.ckpt',
+    step_size=0.15,
     cutn=4,
     cut_pow=1.,
     display_freq=images_interval,
 model = load_vqgan_model(args.vqgan_config, args.vqgan_checkpoint).to(device)
 perceptor = clip.load(args.clip_model, jit=False)[0].eval().requires_grad_(False).to(device)
+def inference(text, seed, step_size):
     texts = text
     target_images = ""
     max_iterations = 100
         print('Using texts:', texts)
     if target_images:
         print('Using image prompts:', target_images)
+    if seed is None or seed == -1:
         seed = torch.seed()
     else:
+        seed = seed
     torch.manual_seed(seed)
     print('Using seed:', seed)
     # clock=deepcopy(perceptor.visual.positional_embedding.data)
         z = torch.rand_like(z)*2
     z_orig = z.clone()
     z.requires_grad_(True)
+    opt = optim.Adam([z], lr=step_size)
     normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
                                     std=[0.26862954, 0.26130258, 0.27577711])
     pMs = []
     data = np.asarray( img, dtype="int32" )
     return data
+def throttled_inference(text, seed, step_size):
     global inferences_running
     current = inferences_running
     if current >= 1:
     print(f"Inference starting when we already had {current} running")
     inferences_running += 1
     try:
+        return inference(text, seed, step_size)
     finally:
         print("Inference finished")
         inferences_running -= 1
 title = "VQGAN + CLIP"
+description = "Gradio demo for VQGAN + CLIP. To use it, simply add your text, or click one of the examples to load them. Read more at the links below. Please click submit only once. Results will show up in under a minute."
+article = "<p style='text-align: center'>Originally made by Katherine Crowson (https://github.com/crowsonkb, https://twitter.com/RiversHaveWings). The original BigGAN+CLIP method was by https://twitter.com/advadnoun. Added some explanations and modifications by Eleiber#8347, pooling trick by Crimeacs#8222 (https://twitter.com/EarthML1) and the GUI was made with the help of Abulafia#3734. | <a href='https://colab.research.google.com/drive/1ZAus_gn2RhTZWzOWUpPERNC0Q8OhZRTZ'>Colab</a> | <a href='https://github.com/CompVis/taming-transformers'>Taming Transformers Github Repo</a> | <a href='https://github.com/openai/CLIP'>CLIP Github Repo</a> | Special thanks to BoneAmputee (https://twitter.com/BoneAmputee) for suggestions and advice</p>"
 gr.Interface(
     throttled_inference,
+    [gr.inputs.Textbox(label="Input"),
+     gr.inputs.Number(default=42, label="seed"),
+     gr.inputs.Slider(minimum=0.1, maximum=0.9, default=0.23, label='step size')
+     ],
     gr.outputs.Image(type="numpy", label="Output"),
     title=title,
     description=description,