Update pipeline.py
Browse files- pipeline.py +0 -19
pipeline.py
CHANGED
@@ -1094,10 +1094,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1094 |
noise_pred_uncond_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
|
1095 |
noise_pred_text_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
|
1096 |
latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
|
1097 |
-
max_sum = 0
|
1098 |
-
min_sum = 0
|
1099 |
-
max_sum_cond = 0
|
1100 |
-
min_sum_cond = 0
|
1101 |
|
1102 |
# foreach context group seperately denoise the current timestep
|
1103 |
for context_group in range(len(context_indexes[i])):
|
@@ -1127,11 +1123,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1127 |
# add the ending frames from noise_pred_uncond to the start of the noise_pred_uncond_sum
|
1128 |
noise_pred_uncond_sum[:, :,current_context_indexes, :, :] += noise_pred_uncond
|
1129 |
noise_pred_text_sum[:, :,current_context_indexes, :, :] += noise_pred_text
|
1130 |
-
# track the average min and max for normalization
|
1131 |
-
max_sum += noise_pred_uncond.max()
|
1132 |
-
min_sum += noise_pred_uncond.min()
|
1133 |
-
max_sum_cond += noise_pred_text.max()
|
1134 |
-
min_sum_cond += noise_pred_text.min()
|
1135 |
#increase the counter for the ending frames
|
1136 |
latent_counter[current_context_indexes] += 1
|
1137 |
|
@@ -1147,16 +1138,6 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
|
|
1147 |
noise_pred_uncond = noise_pred_uncond_sum / latent_counter
|
1148 |
noise_pred_text = noise_pred_text_sum / latent_counter
|
1149 |
|
1150 |
-
# calculate the average min and max for normalization
|
1151 |
-
avg_max = max_sum / latent_counter.sum()
|
1152 |
-
avg_min = min_sum / latent_counter.sum()
|
1153 |
-
avg_max_cond = max_sum_cond / latent_counter.sum()
|
1154 |
-
avg_min_cond = min_sum_cond / latent_counter.sum()
|
1155 |
-
|
1156 |
-
# scale the noise predictions to the range of the avg min and max
|
1157 |
-
noise_pred_uncond = (noise_pred_uncond - avg_min) / (avg_max - avg_min)
|
1158 |
-
noise_pred_text = (noise_pred_text - avg_min_cond) / (avg_max_cond - avg_min_cond)
|
1159 |
-
|
1160 |
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1161 |
|
1162 |
# print min and max
|
|
|
1094 |
noise_pred_uncond_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
|
1095 |
noise_pred_text_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
|
1096 |
latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
|
|
|
|
|
|
|
|
|
1097 |
|
1098 |
# foreach context group seperately denoise the current timestep
|
1099 |
for context_group in range(len(context_indexes[i])):
|
|
|
1123 |
# add the ending frames from noise_pred_uncond to the start of the noise_pred_uncond_sum
|
1124 |
noise_pred_uncond_sum[:, :,current_context_indexes, :, :] += noise_pred_uncond
|
1125 |
noise_pred_text_sum[:, :,current_context_indexes, :, :] += noise_pred_text
|
|
|
|
|
|
|
|
|
|
|
1126 |
#increase the counter for the ending frames
|
1127 |
latent_counter[current_context_indexes] += 1
|
1128 |
|
|
|
1138 |
noise_pred_uncond = noise_pred_uncond_sum / latent_counter
|
1139 |
noise_pred_text = noise_pred_text_sum / latent_counter
|
1140 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1141 |
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1142 |
|
1143 |
# print min and max
|