smoothieAI
/

pipeline_animatediff_context_controlnet

Model card Files Files and versions Community

smoothieAI commited on Jan 27, 2024

Commit

6509a00

verified ·

1 Parent(s): 041b0a3

Update pipeline.py

Browse files

Files changed (1) hide show

pipeline.py +4 -1

pipeline.py CHANGED Viewed

@@ -1417,6 +1417,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                 # foreach context group seperately denoise the current timestep
                 for context_group in range(len(context_indexes[i])):
                     # calculate to current indexes, considering overlap
                     current_context_indexes = context_indexes[i][context_group]
                     # select the relevent context from the latents
@@ -1425,11 +1426,11 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                     # expand the latents if we are doing classifier free guidance
                     latent_model_input = torch.cat([current_context_latents] * 2) if do_classifier_free_guidance else current_context_latents
                     latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
                     if self.controlnet != None and i < int(control_end*len(timesteps)):
                         contorl_start = time.time()
                         current_context_conditioning_frames = conditioning_frames[current_context_indexes, :, :, :]
                         current_context_conditioning_frames = torch.cat([current_context_conditioning_frames] * 2) if do_classifier_free_guidance else current_context_conditioning_frames
@@ -1496,6 +1497,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                         print("unet time", time.time() - unet_start)
                     # sum the noise predictions for the unconditional and text conditioned noise
                     if do_classifier_free_guidance:
                         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
@@ -1504,6 +1506,7 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
                         noise_pred_text_sum[:, :,current_context_indexes, :, :] += noise_pred_text
                         #increase the counter for the ending frames
                         latent_counter[current_context_indexes] += 1
                     # set the step index to the current batch
                     self.scheduler._step_index = i

                 # foreach context group seperately denoise the current timestep
                 for context_group in range(len(context_indexes[i])):
                     # calculate to current indexes, considering overlap
+                    prep_time = time.time()
                     current_context_indexes = context_indexes[i][context_group]
                     # select the relevent context from the latents
                     # expand the latents if we are doing classifier free guidance
                     latent_model_input = torch.cat([current_context_latents] * 2) if do_classifier_free_guidance else current_context_latents
                     latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+                    print("prep time", time.time() - prep_time)
                     if self.controlnet != None and i < int(control_end*len(timesteps)):
                         contorl_start = time.time()
                         current_context_conditioning_frames = conditioning_frames[current_context_indexes, :, :, :]
                         current_context_conditioning_frames = torch.cat([current_context_conditioning_frames] * 2) if do_classifier_free_guidance else current_context_conditioning_frames
                         print("unet time", time.time() - unet_start)
                     # sum the noise predictions for the unconditional and text conditioned noise
+                    start_guidance_time = time.time()
                     if do_classifier_free_guidance:
                         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
                         noise_pred_text_sum[:, :,current_context_indexes, :, :] += noise_pred_text
                         #increase the counter for the ending frames
                         latent_counter[current_context_indexes] += 1
+                    print("guidance time", time.time() - start_guidance_time)
                     # set the step index to the current batch
                     self.scheduler._step_index = i