Update pipeline.py
Browse files- pipeline.py +14 -23
pipeline.py
CHANGED
@@ -1044,30 +1044,24 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1044 |
added_cond_kwargs=added_cond_kwargs,
|
1045 |
).sample
|
1046 |
|
1047 |
-
#
|
1048 |
if do_classifier_free_guidance:
|
1049 |
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
1050 |
-
|
1051 |
-
|
1052 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1053 |
# set the step index to the current batch
|
1054 |
self.scheduler._step_index = i
|
1055 |
|
1056 |
-
# if context_start + context_size > num_frames: remove the appended frames from the end of the current_context_latents
|
1057 |
-
# if wrap_count > 0:
|
1058 |
-
# # add the ending frames from current_context_latents to the start of the latent_sum
|
1059 |
-
# latent_sum[:, :, 0:wrap_count, :, :] += current_context_latents[:, :, -wrap_count:, :, :]
|
1060 |
-
# # increase the counter for the ending frames
|
1061 |
-
# latent_counter[0:wrap_count] += 1
|
1062 |
-
# # remove the ending frames from current_context_latents
|
1063 |
-
# current_context_latents = current_context_latents[:, :, :-wrap_count, :, :]
|
1064 |
-
|
1065 |
-
#add the context current_context_latents back to the latent sum starting from the current context start
|
1066 |
-
# latent_sum[:, :, current_context_start : current_context_start + context_size, :, :] += current_context_latents
|
1067 |
-
|
1068 |
-
# add one to the counter for each timestep in the context
|
1069 |
-
latent_counter[current_context_start : current_context_start + context_size] += 1
|
1070 |
-
|
1071 |
# perform guidance
|
1072 |
if do_classifier_free_guidance:
|
1073 |
latent_counter = latent_counter.reshape(1, 1, num_frames, 1, 1)
|
@@ -1083,12 +1077,9 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1083 |
progress_bar.update()
|
1084 |
if callback is not None and i % callback_steps == 0:
|
1085 |
callback(i, t, None)
|
1086 |
-
|
1087 |
-
|
1088 |
-
# latents = latent_sum / latent_counter
|
1089 |
|
1090 |
# shuffle rotate latent images by step places, wrapping around the last 2 to the start
|
1091 |
-
latents = torch.cat([
|
1092 |
|
1093 |
if output_type == "latent":
|
1094 |
return AnimateDiffPipelineOutput(frames=latents)
|
|
|
1044 |
added_cond_kwargs=added_cond_kwargs,
|
1045 |
).sample
|
1046 |
|
1047 |
+
# sum the noise predictions for the unconditional and text conditioned noise
|
1048 |
if do_classifier_free_guidance:
|
1049 |
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
1050 |
+
if wrap_count > 0:
|
1051 |
+
# add the ending frames from noise_pred_uncond to the start of the noise_pred_uncond_sum
|
1052 |
+
noise_pred_uncond_sum[:, :, 0:wrap_count, :, :] += noise_pred_uncond[:, :, -wrap_count:, :, :]
|
1053 |
+
noise_pred_text_sum[:, :, 0:wrap_count, :, :] += noise_pred_text[:, :, -wrap_count:, :, :]
|
1054 |
+
#increase the counter for the ending frames
|
1055 |
+
latent_counter[0:wrap_count] += 1
|
1056 |
+
# remove the ending frames from noise_pred_uncond
|
1057 |
+
noise_pred_uncond = noise_pred_uncond[:, :, :-wrap_count, :, :]
|
1058 |
+
noise_pred_text = noise_pred_text[:, :, :-wrap_count, :, :]
|
1059 |
+
noise_pred_uncond_sum[:, :, current_context_start : current_context_start + context_size, :, :] += noise_pred_uncond
|
1060 |
+
noise_pred_text_sum[:, :, current_context_start : current_context_start + context_size, :, :] += noise_pred_text
|
1061 |
+
|
1062 |
# set the step index to the current batch
|
1063 |
self.scheduler._step_index = i
|
1064 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1065 |
# perform guidance
|
1066 |
if do_classifier_free_guidance:
|
1067 |
latent_counter = latent_counter.reshape(1, 1, num_frames, 1, 1)
|
|
|
1077 |
progress_bar.update()
|
1078 |
if callback is not None and i % callback_steps == 0:
|
1079 |
callback(i, t, None)
|
|
|
|
|
|
|
1080 |
|
1081 |
# shuffle rotate latent images by step places, wrapping around the last 2 to the start
|
1082 |
+
latents = torch.cat([current_context_latents[:, :, -step:, :, :], current_context_latents[:, :, :-step, :, :]], dim=2)
|
1083 |
|
1084 |
if output_type == "latent":
|
1085 |
return AnimateDiffPipelineOutput(frames=latents)
|