tolgacangoz
/

matryoshka-diffusion-models

Text-to-Image

Diffusers

Safetensors

English

mdm

Model card Files Files and versions Community

tolgacangoz commited on Oct 11, 2024

Commit

33a8121

verified ·

1 Parent(s): 87ecd11

Upload matryoshka.py

Browse files

Files changed (1) hide show

matryoshka.py +39 -26

matryoshka.py CHANGED Viewed

@@ -19,8 +19,8 @@
 # Adapted to Diffusers by [M. Tolga Cangöz](https://github.com/tolgacangoz).
-import inspect
 import gc
 import math
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
@@ -633,7 +633,10 @@ class MatryoshkaDDIMScheduler(SchedulerMixin, ConfigMixin):
         # 4. Clip or threshold "predicted x_0"
         if self.config.thresholding:
             if len(model_output) > 1:
-                pred_original_sample = [self._threshold_sample(p_o_s * scale) / scale for p_o_s, scale in zip(pred_original_sample, self.scales)]
             else:
                 pred_original_sample = self._threshold_sample(pred_original_sample)
         elif self.config.clip_sample:
@@ -3777,14 +3780,17 @@ class MatryoshkaPipeline(
         super().__init__()
         if nesting_level == 0:
-            unet = MatryoshkaUNet2DConditionModel.from_pretrained("tolgacangoz/matryoshka-diffusion-models",
-                                                                  subfolder="unet/nesting_level_0")
         elif nesting_level == 1:
-            unet = NestedUNet2DConditionModel.from_pretrained("tolgacangoz/matryoshka-diffusion-models",
-                                                                subfolder="unet/nesting_level_1")
         elif nesting_level == 2:
-            unet = NestedUNet2DConditionModel.from_pretrained("tolgacangoz/matryoshka-diffusion-models",
-                                                                subfolder="unet/nesting_level_2")
         else:
             raise ValueError("Currently, nesting levels 0, 1, and 2 are supported.")
@@ -3854,17 +3860,20 @@ class MatryoshkaPipeline(
         if nesting_level == 0:
             if hasattr(self.unet, "nest_ratio"):
                 self.scheduler.scales = None
-            self.unet = MatryoshkaUNet2DConditionModel.from_pretrained("tolgacangoz/matryoshka-diffusion-models",
-                                                                      subfolder="unet/nesting_level_0").to(self.device)
             self.config.nesting_level = 0
         elif nesting_level == 1:
-            self.unet = NestedUNet2DConditionModel.from_pretrained("tolgacangoz/matryoshka-diffusion-models",
-                                                                    subfolder="unet/nesting_level_1").to(self.device)
             self.config.nesting_level = 1
             self.scheduler.scales = self.unet.nest_ratio + [1]
         elif nesting_level == 2:
-            self.unet = NestedUNet2DConditionModel.from_pretrained("tolgacangoz/matryoshka-diffusion-models",
-                                                                    subfolder="unet/nesting_level_2").to(self.device)
             self.config.nesting_level = 2
             self.scheduler.scales = self.unet.nest_ratio + [1]
         else:
@@ -4030,7 +4039,9 @@ class MatryoshkaPipeline(
                 prompt_attention_mask = torch.cat(
                     [
                         prompt_attention_mask,
-                        torch.zeros(batch_size, max_len - len(prompt_attention_mask[0]), dtype=torch.long, device=device),
                     ],
                     dim=1,
                 )
@@ -4042,7 +4053,12 @@ class MatryoshkaPipeline(
                 negative_prompt_attention_mask = torch.cat(
                     [
                         negative_prompt_attention_mask,
-                        torch.zeros(batch_size, max_len - len(negative_prompt_attention_mask[0]), dtype=torch.long, device=device),
                     ],
                     dim=1,
                 )
@@ -4533,7 +4549,6 @@ class MatryoshkaPipeline(
                 self.do_classifier_free_guidance,
             )
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
         # 4. Prepare timesteps
@@ -4656,17 +4671,14 @@ class MatryoshkaPipeline(
         image = latents
         if self.scheduler.scales is not None:
-            scales = [
-                image[i].size(-1) / image[-1].size(-1)
-                for i in range(len(image))
-                ]
             for i, (img, scale) in enumerate(zip(image, scales)):
                 img = torch.clip(img * scale, -1, 1)
-                img = torch.clamp(img * 0.5 + 0.5, min=0, max=1).cpu()
-                img = img.squeeze(0).permute(1, 2, 0).numpy()
                 # img = self.image_processor.pt_to_numpy(img)
-                image[i] = numpy_to_pil(img)[0]
-                # image[i] = self.image_processor.postprocess(img * scale, output_type=output_type)[0]
         else:
             image = self.image_processor.postprocess(image, output_type=output_type)
@@ -4678,6 +4690,7 @@ class MatryoshkaPipeline(
         return MatryoshkaPipelineOutput(images=image)
 def numpy_to_pil(images: np.ndarray) -> List[Image.Image]:
     """
     Convert a numpy image or a batch of images to a PIL image.
@@ -4691,4 +4704,4 @@ def numpy_to_pil(images: np.ndarray) -> List[Image.Image]:
     else:
         pil_images = [Image.fromarray(image) for image in images]
-    return pil_images

 # Adapted to Diffusers by [M. Tolga Cangöz](https://github.com/tolgacangoz).
 import gc
+import inspect
 import math
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
         # 4. Clip or threshold "predicted x_0"
         if self.config.thresholding:
             if len(model_output) > 1:
+                pred_original_sample = [
+                    self._threshold_sample(p_o_s * scale) / scale
+                    for p_o_s, scale in zip(pred_original_sample, self.scales)
+                ]
             else:
                 pred_original_sample = self._threshold_sample(pred_original_sample)
         elif self.config.clip_sample:
         super().__init__()
         if nesting_level == 0:
+            unet = MatryoshkaUNet2DConditionModel.from_pretrained(
+                "tolgacangoz/matryoshka-diffusion-models", subfolder="unet/nesting_level_0"
+            )
         elif nesting_level == 1:
+            unet = NestedUNet2DConditionModel.from_pretrained(
+                "tolgacangoz/matryoshka-diffusion-models", subfolder="unet/nesting_level_1"
+            )
         elif nesting_level == 2:
+            unet = NestedUNet2DConditionModel.from_pretrained(
+                "tolgacangoz/matryoshka-diffusion-models", subfolder="unet/nesting_level_2"
+            )
         else:
             raise ValueError("Currently, nesting levels 0, 1, and 2 are supported.")
         if nesting_level == 0:
             if hasattr(self.unet, "nest_ratio"):
                 self.scheduler.scales = None
+            self.unet = MatryoshkaUNet2DConditionModel.from_pretrained(
+                "tolgacangoz/matryoshka-diffusion-models", subfolder="unet/nesting_level_0"
+            ).to(self.device)
             self.config.nesting_level = 0
         elif nesting_level == 1:
+            self.unet = NestedUNet2DConditionModel.from_pretrained(
+                "tolgacangoz/matryoshka-diffusion-models", subfolder="unet/nesting_level_1"
+            ).to(self.device)
             self.config.nesting_level = 1
             self.scheduler.scales = self.unet.nest_ratio + [1]
         elif nesting_level == 2:
+            self.unet = NestedUNet2DConditionModel.from_pretrained(
+                "tolgacangoz/matryoshka-diffusion-models", subfolder="unet/nesting_level_2"
+            ).to(self.device)
             self.config.nesting_level = 2
             self.scheduler.scales = self.unet.nest_ratio + [1]
         else:
                 prompt_attention_mask = torch.cat(
                     [
                         prompt_attention_mask,
+                        torch.zeros(
+                            batch_size, max_len - len(prompt_attention_mask[0]), dtype=torch.long, device=device
+                        ),
                     ],
                     dim=1,
                 )
                 negative_prompt_attention_mask = torch.cat(
                     [
                         negative_prompt_attention_mask,
+                        torch.zeros(
+                            batch_size,
+                            max_len - len(negative_prompt_attention_mask[0]),
+                            dtype=torch.long,
+                            device=device,
+                        ),
                     ],
                     dim=1,
                 )
                 self.do_classifier_free_guidance,
             )
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
         # 4. Prepare timesteps
         image = latents
         if self.scheduler.scales is not None:
+            scales = [image[i].size(-1) / image[-1].size(-1) for i in range(len(image))]
             for i, (img, scale) in enumerate(zip(image, scales)):
                 img = torch.clip(img * scale, -1, 1)
+                # img = torch.clamp(img * 0.5 + 0.5, min=0, max=1).cpu()
+                # img = img.squeeze(0).permute(1, 2, 0).numpy()
                 # img = self.image_processor.pt_to_numpy(img)
+                # image[i] = numpy_to_pil(img)[0]
+                image[i] = self.image_processor.postprocess(img, output_type=output_type)[0]
         else:
             image = self.image_processor.postprocess(image, output_type=output_type)
         return MatryoshkaPipelineOutput(images=image)
 def numpy_to_pil(images: np.ndarray) -> List[Image.Image]:
     """
     Convert a numpy image or a batch of images to a PIL image.
     else:
         pil_images = [Image.fromarray(image) for image in images]
+    return pil_images