Update pipeline.py
Browse files- pipeline.py +0 -13
pipeline.py
CHANGED
@@ -956,20 +956,16 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
956 |
|
957 |
# divide the initial latents into context groups
|
958 |
num_context_groups = num_frames // (context_size-overlap)
|
959 |
-
print(f"Num context groups: {num_context_groups}")
|
960 |
|
961 |
# Denoising loop
|
962 |
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
963 |
with self.progress_bar(total=len(timesteps)) as progress_bar:
|
964 |
for i, t in enumerate(timesteps):
|
965 |
-
print(f"Step: {i}")
|
966 |
-
print(f"Timestep: {t}")
|
967 |
latent_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
|
968 |
latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
|
969 |
|
970 |
# foreach context group seperately denoise the current timestep
|
971 |
for context_group in range(num_context_groups):
|
972 |
-
print(f"Context group: {context_group}")
|
973 |
# calculate to current indexes, considering overlap
|
974 |
if context_group == 0:current_context_start = 0
|
975 |
else:current_context_start = context_group * (context_size - overlap)
|
@@ -978,7 +974,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
978 |
current_context_latents = latents[:, :, current_context_start : current_context_start + context_size, :, :]
|
979 |
|
980 |
wrap_count = max(current_context_start + context_size - num_frames, 0)
|
981 |
-
print(f"Wrap count: {wrap_count}")
|
982 |
|
983 |
# if context_start + context_size > num_frames: append the remaining frames from the start of the latents
|
984 |
if wrap_count > 0:
|
@@ -1009,14 +1004,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1009 |
# compute the previous noisy sample x_t -> x_t-1
|
1010 |
current_context_latents = self.scheduler.step(noise_pred, t, current_context_latents, **extra_step_kwargs).prev_sample
|
1011 |
|
1012 |
-
# remove the appended frames from the end of the current_context_latents
|
1013 |
-
# if wrap_count > 0:
|
1014 |
-
# # remove the ending frames from current_context_latents
|
1015 |
-
# current_context_latents = current_context_latents[:, :, :-wrap_count, :, :]
|
1016 |
-
# # remove the ending frames from noise_pred
|
1017 |
-
# noise_pred = noise_pred[:, :, :-wrap_count, :, :]
|
1018 |
-
# # print the shape of the current_context_latents and noise_pred
|
1019 |
-
|
1020 |
# if context_start + context_size > num_frames: remove the appended frames from the end of the current_context_latents
|
1021 |
if wrap_count > 0:
|
1022 |
# add the ending frames from current_context_latents to the start of the latent_sum
|
|
|
956 |
|
957 |
# divide the initial latents into context groups
|
958 |
num_context_groups = num_frames // (context_size-overlap)
|
|
|
959 |
|
960 |
# Denoising loop
|
961 |
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
962 |
with self.progress_bar(total=len(timesteps)) as progress_bar:
|
963 |
for i, t in enumerate(timesteps):
|
|
|
|
|
964 |
latent_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
|
965 |
latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
|
966 |
|
967 |
# foreach context group seperately denoise the current timestep
|
968 |
for context_group in range(num_context_groups):
|
|
|
969 |
# calculate to current indexes, considering overlap
|
970 |
if context_group == 0:current_context_start = 0
|
971 |
else:current_context_start = context_group * (context_size - overlap)
|
|
|
974 |
current_context_latents = latents[:, :, current_context_start : current_context_start + context_size, :, :]
|
975 |
|
976 |
wrap_count = max(current_context_start + context_size - num_frames, 0)
|
|
|
977 |
|
978 |
# if context_start + context_size > num_frames: append the remaining frames from the start of the latents
|
979 |
if wrap_count > 0:
|
|
|
1004 |
# compute the previous noisy sample x_t -> x_t-1
|
1005 |
current_context_latents = self.scheduler.step(noise_pred, t, current_context_latents, **extra_step_kwargs).prev_sample
|
1006 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1007 |
# if context_start + context_size > num_frames: remove the appended frames from the end of the current_context_latents
|
1008 |
if wrap_count > 0:
|
1009 |
# add the ending frames from current_context_latents to the start of the latent_sum
|