Spaces:

EleutherAI
/

VQGAN_CLIP

Runtime error

App Files Files Community

Ahsen Khaliq commited on Aug 11, 2021

Commit

2a759eb

1 Parent(s): bf17e84

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -12

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import math
 from pathlib import Path
 import sys
 sys.path.insert(1, './taming-transformers')
-#from IPython import display
 from base64 import b64encode
 from omegaconf import OmegaConf
 from PIL import Image
@@ -29,6 +29,11 @@ nvidia_smi.nvmlInit()
 handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
 # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate
 torch.hub.download_url_to_file('https://i.imgur.com/WEHmKef.jpg', 'gpu.jpg')
 def sinc(x):
     return torch.where(x != 0, torch.sin(math.pi * x) / (math.pi * x), x.new_ones([]))
 def lanczos(x, a):
@@ -193,9 +198,11 @@ device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 print('Using device:', device)
 model = load_vqgan_model(args.vqgan_config, args.vqgan_checkpoint).to(device)
 perceptor = clip.load(args.clip_model, jit=False)[0].eval().requires_grad_(False).to(device)
-def inference(text, seed, step_size, max_iterations, width, height):
     size=[width, height]
     texts = text
     target_images = ""
     max_iterations = max_iterations
     model_names={"vqgan_imagenet_f16_16384": 'ImageNet 16384',"vqgan_imagenet_f16_1024":"ImageNet 1024", 'vqgan_openimages_f16_8192':'OpenImages 8912',
@@ -293,8 +300,8 @@ def inference(text, seed, step_size, max_iterations, width, height):
         losses_str = ', '.join(f'{loss.item():g}' for loss in losses)
         tqdm.write(f'i: {i}, loss: {sum(losses).item():g}, losses: {losses_str}')
         out = synth(z)
-        #TF.to_pil_image(out[0].cpu()).save('progress.png')
-        #display.display(display.Image('progress.png'))
         res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
         print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
     def ascend_txt():
@@ -303,9 +310,9 @@ def inference(text, seed, step_size, max_iterations, width, height):
         iii = perceptor.encode_image(normalize(make_cutouts(out))).float()
         result = []
-        if args.init_weight:
-            # result.append(F.mse_loss(z, z_orig) * args.init_weight / 2)
-            result.append(F.mse_loss(z, torch.zeros_like(z_orig)) * ((1/torch.tensor(i*2 + 1))*args.init_weight) / 2)
         for prompt in pMs:
             result.append(prompt(iii))
         img = np.array(out.mul(255).clamp(0, 255)[0].cpu().detach().numpy().astype(np.uint8))[:,:,:]
@@ -342,7 +349,7 @@ def load_image( infilename ) :
     img.load()
     data = np.asarray( img, dtype="int32" )
     return data
-def throttled_inference(text, seed, step_size, max_iterations, width, height):
     global inferences_running
     current = inferences_running
     if current >= 3:
@@ -351,7 +358,7 @@ def throttled_inference(text, seed, step_size, max_iterations, width, height):
     print(f"Inference starting when we already had {current} running")
     inferences_running += 1
     try:
-        return inference(text, seed, step_size, max_iterations, width, height)
     finally:
         print("Inference finished")
         inferences_running -= 1
@@ -366,14 +373,16 @@ gr.Interface(
     gr.inputs.Slider(minimum=25, maximum=150, default=80, label='max iterations', step=1),
     gr.inputs.Slider(minimum=200, maximum=280, default=256, label='width', step=1),
     gr.inputs.Slider(minimum=200, maximum=280, default=256, label='height', step=1),
      ],
     gr.outputs.Image(type="numpy", label="Output"),
     title=title,
     description=description,
     article=article,
     examples=[
-              ['a garden by james gurney',42,0.16, 100, 256, 256],
-              ['coral reef city artstationHQ',1000,0.6, 110, 200, 200],
-              ['a cabin in the mountains unreal engine',98,0.3, 120, 280, 280]
     ]
     ).launch(debug=True)

 from pathlib import Path
 import sys
 sys.path.insert(1, './taming-transformers')
+# from IPython import display
 from base64 import b64encode
 from omegaconf import OmegaConf
 from PIL import Image
 handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
 # card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate
 torch.hub.download_url_to_file('https://i.imgur.com/WEHmKef.jpg', 'gpu.jpg')
+torch.hub.download_url_to_file('https://images.pexels.com/photos/158028/bellingrath-gardens-alabama-landscape-scenic-158028.jpeg', 'garden.jpeg')
+torch.hub.download_url_to_file('https://images.pexels.com/photos/68767/divers-underwater-ocean-swim-68767.jpeg', 'coralreef.jpeg')
+torch.hub.download_url_to_file('https://images.pexels.com/photos/803975/pexels-photo-803975.jpeg', 'cabin.jpeg')
 def sinc(x):
     return torch.where(x != 0, torch.sin(math.pi * x) / (math.pi * x), x.new_ones([]))
 def lanczos(x, a):
 print('Using device:', device)
 model = load_vqgan_model(args.vqgan_config, args.vqgan_checkpoint).to(device)
 perceptor = clip.load(args.clip_model, jit=False)[0].eval().requires_grad_(False).to(device)
+def inference(text, seed, step_size, max_iterations, width, height, init_image, init_weight):
     size=[width, height]
     texts = text
+    init_weight=init_weight
+    init_image = init_image.name
     target_images = ""
     max_iterations = max_iterations
     model_names={"vqgan_imagenet_f16_16384": 'ImageNet 16384',"vqgan_imagenet_f16_1024":"ImageNet 1024", 'vqgan_openimages_f16_8192':'OpenImages 8912',
         losses_str = ', '.join(f'{loss.item():g}' for loss in losses)
         tqdm.write(f'i: {i}, loss: {sum(losses).item():g}, losses: {losses_str}')
         out = synth(z)
+        # TF.to_pil_image(out[0].cpu()).save('progress.png')
+        # display.display(display.Image('progress.png'))
         res = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
         print(f'gpu: {res.gpu}%, gpu-mem: {res.memory}%')
     def ascend_txt():
         iii = perceptor.encode_image(normalize(make_cutouts(out))).float()
         result = []
+        if init_weight:
+            # result.append(F.mse_loss(z, z_orig) * init_weight / 2)
+            result.append(F.mse_loss(z, torch.zeros_like(z_orig)) * ((1/torch.tensor(i*2 + 1))*init_weight) / 2)
         for prompt in pMs:
             result.append(prompt(iii))
         img = np.array(out.mul(255).clamp(0, 255)[0].cpu().detach().numpy().astype(np.uint8))[:,:,:]
     img.load()
     data = np.asarray( img, dtype="int32" )
     return data
+def throttled_inference(text, seed, step_size, max_iterations, width, height, init_image, init_weight):
     global inferences_running
     current = inferences_running
     if current >= 3:
     print(f"Inference starting when we already had {current} running")
     inferences_running += 1
     try:
+        return inference(text, seed, step_size, max_iterations, width, height, init_image, init_weight)
     finally:
         print("Inference finished")
         inferences_running -= 1
     gr.inputs.Slider(minimum=25, maximum=150, default=80, label='max iterations', step=1),
     gr.inputs.Slider(minimum=200, maximum=280, default=256, label='width', step=1),
     gr.inputs.Slider(minimum=200, maximum=280, default=256, label='height', step=1),
+    gr.inputs.Image(type="file", label="Initial Image"),
+    gr.inputs.Slider(minimum=0.0, maximum=15.0, default=0.0, label='Initial Weight', step=1.0),
      ],
     gr.outputs.Image(type="numpy", label="Output"),
     title=title,
     description=description,
     article=article,
     examples=[
+              ['a garden by james gurney',42,0.16, 100, 256, 256, 'garden.jpeg', 0.0 ],
+              ['coral reef city artstationHQ',1000,0.6, 110, 200, 200, 'coralreef.jpeg', 0.0],
+              ['a cabin in the mountains unreal engine',98,0.3, 120, 280, 280, 'cabin.jpeg', 0.0]
     ]
     ).launch(debug=True)