smoothieAI
/

pipeline_animatediff_context_controlnet

Model card Files Files and versions Community

smoothieAI commited on Jan 28, 2024

Commit

4b1ffbf

verified ·

1 Parent(s): 57f4540

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +0 -33

pipeline.py CHANGED Viewed

@@ -982,7 +982,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         image = self.control_image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
         # image_batch_size = image.shape[0]
         image_batch_size = len(image)
-        print("prepared control image_batch_size", image_batch_size)
         # if image_batch_size == 1:
         #     repeat_by = batch_size
@@ -996,9 +995,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         # if do_classifier_free_guidance and not guess_mode:
         #     image = torch.cat([image] * 2)
-        print("prepared control image_batch_size", image.shape)
-        print("prepared control device", image.device)
         return image
@@ -1258,7 +1254,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         # round num frames to the nearest multiple of context size - overlap
         num_frames = (num_frames // (context_size - overlap)) * (context_size - overlap)
-        print(f"Num frames: {num_frames}")
         # 5. Prepare latent variables
         num_channels_latents = self.unet.config.in_channels
@@ -1408,15 +1403,12 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         with self.progress_bar(total=len(timesteps)) as progress_bar:
             for i, t in enumerate(timesteps):
-                print("i", i)
-                print("t", t)
                 noise_pred_uncond_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
                 noise_pred_text_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
                 latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
                 # foreach context group seperately denoise the current timestep
                 for context_group in range(len(context_indexes[i])):
-                    print("Denoising context group", context_group, "of", len(context_indexes[i]))
                     # calculate to current indexes, considering overlapa
                     current_context_indexes = context_indexes[i][context_group]
@@ -1428,10 +1420,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                     latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
                     control_end_step = int(control_end*num_inference_steps)
-                    print(i, control_end_step)
-                    print("control_end_step", control_end_step)
-                    if self.controlnet != None and i < control_end_step:
-                        print("adding controlnet")
                     if self.controlnet != None and i < int(control_end*num_inference_steps):
@@ -1477,9 +1465,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                             return_dict=False,
                         )
-                        torch.cuda.synchronize()  # Synchronize GPU
-                        print("controlnet time", time.time() - control_start)
-                        torch.cuda.synchronize()
                         unet_start = time.time()
                         # predict the noise residual with the added controlnet residuals
                         noise_pred = self.unet(
@@ -1491,8 +1476,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                             down_block_additional_residuals=down_block_res_samples,
                             mid_block_additional_residual=mid_block_res_sample,
                         ).sample
-                        torch.cuda.synchronize()
-                        print("unet time", time.time() - unet_start)
                     else:
                         # predict the noise residual without contorlnet
@@ -1505,9 +1488,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                             cross_attention_kwargs=cross_attention_kwargs,
                             added_cond_kwargs=added_cond_kwargs,
                         ).sample
-                        torch.cuda.synchronize()
-                        print("unet time", time.time() - unet_start)
                     if do_classifier_free_guidance:
                         # Start timing for overall guidance process
@@ -1520,10 +1500,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                         noise_pred_uncond, noise_pred_text = torch.chunk(noise_pred, 2, dim=0)
-                        torch.cuda.synchronize()  # Synchronize GPU after chunking
-                        time_chunk_end = time.time()
-                        print("Chunk time: {:.6f} seconds".format(time_chunk_end - time_chunk_start))
                         # Timing for batch addition and latent counter increment
                         torch.cuda.synchronize()  # Synchronize GPU before batch addition
                         time_batch_addition_start = time.time()
@@ -1533,15 +1509,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                         noise_pred_text_sum[..., current_context_indexes, :, :] += noise_pred_text
                         latent_counter[current_context_indexes] += 1
-                        torch.cuda.synchronize()  # Synchronize GPU after batch addition
-                        time_batch_addition_end = time.time()
-                        print("Batch addition and counter increment time: {:.6f} seconds".format(time_batch_addition_end - time_batch_addition_start))
-                        # End timing for overall guidance process
-                        torch.cuda.synchronize()  # Synchronize GPU after overall guidance process
-                        end_guidance_time = time.time()
-                        print("Total guidance time: {:.6f} seconds".format(end_guidance_time - start_guidance_time))
                     # set the step index to the current batch
                     self.scheduler._step_index = i

         image = self.control_image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
         # image_batch_size = image.shape[0]
         image_batch_size = len(image)
         # if image_batch_size == 1:
         #     repeat_by = batch_size
         # if do_classifier_free_guidance and not guess_mode:
         #     image = torch.cat([image] * 2)
         return image
         # round num frames to the nearest multiple of context size - overlap
         num_frames = (num_frames // (context_size - overlap)) * (context_size - overlap)
         # 5. Prepare latent variables
         num_channels_latents = self.unet.config.in_channels
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         with self.progress_bar(total=len(timesteps)) as progress_bar:
             for i, t in enumerate(timesteps):
                 noise_pred_uncond_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
                 noise_pred_text_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
                 latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
                 # foreach context group seperately denoise the current timestep
                 for context_group in range(len(context_indexes[i])):
                     # calculate to current indexes, considering overlapa
                     current_context_indexes = context_indexes[i][context_group]
                     latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
                     control_end_step = int(control_end*num_inference_steps)
                     if self.controlnet != None and i < int(control_end*num_inference_steps):
                             return_dict=False,
                         )
                         unet_start = time.time()
                         # predict the noise residual with the added controlnet residuals
                         noise_pred = self.unet(
                             down_block_additional_residuals=down_block_res_samples,
                             mid_block_additional_residual=mid_block_res_sample,
                         ).sample
                     else:
                         # predict the noise residual without contorlnet
                             cross_attention_kwargs=cross_attention_kwargs,
                             added_cond_kwargs=added_cond_kwargs,
                         ).sample
                     if do_classifier_free_guidance:
                         # Start timing for overall guidance process
                         noise_pred_uncond, noise_pred_text = torch.chunk(noise_pred, 2, dim=0)
                         # Timing for batch addition and latent counter increment
                         torch.cuda.synchronize()  # Synchronize GPU before batch addition
                         time_batch_addition_start = time.time()
                         noise_pred_text_sum[..., current_context_indexes, :, :] += noise_pred_text
                         latent_counter[current_context_indexes] += 1
                     # set the step index to the current batch
                     self.scheduler._step_index = i