Spaces:

willsh1997
/

littlefreaksgradio

Running

App Files Files Community

willsh1997 commited on 23 days ago

Commit

075f0c6

verified ·

1 Parent(s): 22b2550

:robot: fix tensor device issue with claude

Browse files

Files changed (1) hide show

app.py +60 -48

app.py CHANGED Viewed

@@ -106,7 +106,6 @@ class customUnClipPipeline(UnCLIPImageVariationPipeline):
     ):
         """
         The call function to the pipeline for generation.
         Args:
             image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.Tensor`):
                 `Image` or tensor representing an image batch to be used as the starting point. If you provide a
@@ -138,7 +137,6 @@ class customUnClipPipeline(UnCLIPImageVariationPipeline):
                 The output format of the generated image. Choose between `PIL.Image` or `np.array`.
             return_dict (`bool`, *optional*, defaults to `True`):
                 Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
         Returns:
             [`~pipelines.ImagePipelineOutput`] or `tuple`:
                 If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
@@ -314,14 +312,11 @@ class customUnClipPipeline(UnCLIPImageVariationPipeline):
 ### ADDITIONAL PIPELINE CODE FOR KARLO
-torch_device = torch.device('cpu')
-pipe = customUnClipPipeline.from_pretrained("kakaobrain/karlo-v1-alpha-image-variations", torch_dtype=torch.float32, trust_remote_code=True,
-                                            # device=torch_device,
-                                            # device_map='cpu'
-                                           )
-pipe.to(torch.device("cuda"))
-# pipe.enable_model_cpu_offload()
 # func for getting tensor embeddings from cand image
@@ -334,27 +329,27 @@ def load_img_from_URL(URL):
     init_image = Image.open(BytesIO(response.content)).convert("RGB")
     return init_image
-def embed_img(input_image):
-    tokens = pipe.feature_extractor(input_image).to(torch_device)
-    img_model = pipe.image_encoder.to(torch_device)
     with torch.no_grad():
-        embeds = img_model(torch.tensor(tokens.pixel_values[0]).unsqueeze(0).to(torch_device))
-    return embeds.image_embeds.to(torch_device)
-def localimg_2_embed(image_dir):
-    embeds = embed_img(load_image(image_dir))
     return embeds
-def URLimg_2_embed(URL):
-    embeds = embed_img(load_img_from_URL(URL))
     return embeds
 # random generator for softmaxxed outputs
-def random_probdist(num_cands):
-    random_numbers = torch.randn(num_cands)
     softmax_output = torch.nn.functional.softmax(random_numbers, dim=0).reshape((num_cands,1))
     return softmax_output
@@ -366,13 +361,15 @@ def scalesum_candtensors(list_scale, cand_tensors):
     assert sum(list_scale) == 1, f"you didn't input a valid probability distribution - make sure your scales add up to 1, currently it adds up to {sum(list_scale)}"
     assert len(list_scale) == len(cand_tensors), f"your scale list is not the same length as your list of candidate tensors. len list = {len(list_scale)}, len cand tensors = {len(cand_tensors)}"
-    scaled = torch.tensor(list_scale), cand_tensors
     output = scaled.sum(dim=0)
     return output
 def random_candtensor(cand_tensors):
-    scaled = random_probdist(len(cand_tensors)) * cand_tensors
     output = scaled.sum(dim=0)
     return output
@@ -390,37 +387,52 @@ def image_grid(imgs, rows, cols):
     return grid
-chaosclicker_willtensor = localimg_2_embed('willpaint-imgs/chaosclicker-willpaint.png').to(torch_device)
-contentcnsr_willtensor = localimg_2_embed('willpaint-imgs/contentconnoisseur-willpaint.png').to(torch_device)
-digdaydrmr_willtensor = localimg_2_embed('willpaint-imgs/digitaldaydreamer-willpaint.png').to(torch_device)
-ecoexplr_willtensor = localimg_2_embed('willpaint-imgs/ecoexplorer-willpaint.png').to(torch_device)
-fandomfox_willtensor = localimg_2_embed('willpaint-imgs/fandomfox-willpaint.png').to(torch_device)
-mememaven_willtensor = localimg_2_embed('willpaint-imgs/mememaven-willpaint.png').to(torch_device)
-newsnerd_willtensor = localimg_2_embed('willpaint-imgs/newnerd-willpaint.png').to(torch_device)
-nostalgicnvgtr_willtensor = localimg_2_embed('willpaint-imgs/nostalgicnavigator-willpaint.png').to(torch_device)
-scrollseeker_willtensor = localimg_2_embed('willpaint-imgs/scrollseeker-willpaint.png').to(torch_device)
-trendtracker_willtensor = localimg_2_embed('willpaint-imgs/trendtracker-willpaint.png').to(torch_device)
-will_cand_tensors = torch.cat([chaosclicker_willtensor,
-                                contentcnsr_willtensor ,
-                                digdaydrmr_willtensor,
-                                ecoexplr_willtensor,
-                                fandomfox_willtensor,
-                                mememaven_willtensor,
-                                newsnerd_willtensor,
-                                nostalgicnvgtr_willtensor,
-                                scrollseeker_willtensor,
-                                trendtracker_willtensor,], dim=0)
 ### FUNCTION FOR EXECUTION
 @spaces.GPU
 def generate_freak():
-    will_randomised_input = random_candtensor(will_cand_tensors).unsqueeze(0)
-    #will_randomised_input
-    output = pipe(image_embeddings=will_randomised_input.to("cuda"), num_images_per_prompt=1, decoder_num_inference_steps = 15, super_res_num_inference_steps = 4)
     return output.images[0]
 ### GRADIO BACKEND

     ):
         """
         The call function to the pipeline for generation.
         Args:
             image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.Tensor`):
                 `Image` or tensor representing an image batch to be used as the starting point. If you provide a
                 The output format of the generated image. Choose between `PIL.Image` or `np.array`.
             return_dict (`bool`, *optional*, defaults to `True`):
                 Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
         Returns:
             [`~pipelines.ImagePipelineOutput`] or `tuple`:
                 If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is
 ### ADDITIONAL PIPELINE CODE FOR KARLO
+# Initialize pipeline on CPU first
+pipe = customUnClipPipeline.from_pretrained("kakaobrain/karlo-v1-alpha-image-variations", torch_dtype=torch.float32, trust_remote_code=True)
+# Global variable to store embeddings - will be loaded on GPU when needed
+will_cand_tensors = None
 # func for getting tensor embeddings from cand image
     init_image = Image.open(BytesIO(response.content)).convert("RGB")
     return init_image
+def embed_img(input_image, device):
+    tokens = pipe.feature_extractor(input_image)
+    img_model = pipe.image_encoder.to(device)
     with torch.no_grad():
+        embeds = img_model(torch.tensor(tokens.pixel_values[0]).unsqueeze(0).to(device))
+    return embeds.image_embeds
+def localimg_2_embed(image_dir, device):
+    embeds = embed_img(load_image(image_dir), device)
     return embeds
+def URLimg_2_embed(URL, device):
+    embeds = embed_img(load_img_from_URL(URL), device)
     return embeds
 # random generator for softmaxxed outputs
+def random_probdist(num_cands, device):
+    random_numbers = torch.randn(num_cands, device=device)
     softmax_output = torch.nn.functional.softmax(random_numbers, dim=0).reshape((num_cands,1))
     return softmax_output
     assert sum(list_scale) == 1, f"you didn't input a valid probability distribution - make sure your scales add up to 1, currently it adds up to {sum(list_scale)}"
     assert len(list_scale) == len(cand_tensors), f"your scale list is not the same length as your list of candidate tensors. len list = {len(list_scale)}, len cand tensors = {len(cand_tensors)}"
+    device = cand_tensors.device
+    scaled = torch.tensor(list_scale, device=device).unsqueeze(1) * cand_tensors
     output = scaled.sum(dim=0)
     return output
 def random_candtensor(cand_tensors):
+    device = cand_tensors.device
+    scaled = random_probdist(len(cand_tensors), device) * cand_tensors
     output = scaled.sum(dim=0)
     return output
     return grid
+def initialize_embeddings(device):
+    """Initialize embeddings on the correct device"""
+    global will_cand_tensors
+    if will_cand_tensors is None:
+        # Create embeddings on the specified device
+        chaosclicker_willtensor = localimg_2_embed('willpaint-imgs/chaosclicker-willpaint.png', device)
+        contentcnsr_willtensor = localimg_2_embed('willpaint-imgs/contentconnoisseur-willpaint.png', device)
+        digdaydrmr_willtensor = localimg_2_embed('willpaint-imgs/digitaldaydreamer-willpaint.png', device)
+        ecoexplr_willtensor = localimg_2_embed('willpaint-imgs/ecoexplorer-willpaint.png', device)
+        fandomfox_willtensor = localimg_2_embed('willpaint-imgs/fandomfox-willpaint.png', device)
+        mememaven_willtensor = localimg_2_embed('willpaint-imgs/mememaven-willpaint.png', device)
+        newsnerd_willtensor = localimg_2_embed('willpaint-imgs/newnerd-willpaint.png', device)
+        nostalgicnvgtr_willtensor = localimg_2_embed('willpaint-imgs/nostalgicnavigator-willpaint.png', device)
+        scrollseeker_willtensor = localimg_2_embed('willpaint-imgs/scrollseeker-willpaint.png', device)
+        trendtracker_willtensor = localimg_2_embed('willpaint-imgs/trendtracker-willpaint.png', device)
+        will_cand_tensors = torch.cat([chaosclicker_willtensor,
+                                        contentcnsr_willtensor ,
+                                        digdaydrmr_willtensor,
+                                        ecoexplr_willtensor,
+                                        fandomfox_willtensor,
+                                        mememaven_willtensor,
+                                        newsnerd_willtensor,
+                                        nostalgicnvgtr_willtensor,
+                                        scrollseeker_willtensor,
+                                        trendtracker_willtensor,], dim=0)
+    return will_cand_tensors
 ### FUNCTION FOR EXECUTION
 @spaces.GPU
 def generate_freak():
+    # Move pipeline to GPU
+    device = torch.device("cuda")
+    pipe.to(device)
+    # Initialize embeddings on GPU
+    cand_tensors = initialize_embeddings(device)
+    # Generate random input on GPU
+    will_randomised_input = random_candtensor(cand_tensors).unsqueeze(0)
+    # Generate image
+    output = pipe(image_embeddings=will_randomised_input, num_images_per_prompt=1, decoder_num_inference_steps = 15, super_res_num_inference_steps = 4)
     return output.images[0]
 ### GRADIO BACKEND