smoothieAI commited on
Commit
1ee3c52
·
verified ·
1 Parent(s): 788e3dc

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +20 -0
pipeline.py CHANGED
@@ -1094,6 +1094,10 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1094
  noise_pred_uncond_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
1095
  noise_pred_text_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
1096
  latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
 
 
 
 
1097
 
1098
  # foreach context group seperately denoise the current timestep
1099
  for context_group in range(len(context_indexes[i])):
@@ -1123,6 +1127,11 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1123
  # add the ending frames from noise_pred_uncond to the start of the noise_pred_uncond_sum
1124
  noise_pred_uncond_sum[:, :,current_context_indexes, :, :] += noise_pred_uncond
1125
  noise_pred_text_sum[:, :,current_context_indexes, :, :] += noise_pred_text
 
 
 
 
 
1126
  #increase the counter for the ending frames
1127
  latent_counter[current_context_indexes] += 1
1128
 
@@ -1137,6 +1146,17 @@ class AnimateDiffPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdap
1137
  latent_counter = latent_counter.reshape(1, 1, num_frames, 1, 1)
1138
  noise_pred_uncond = noise_pred_uncond_sum / latent_counter
1139
  noise_pred_text = noise_pred_text_sum / latent_counter
 
 
 
 
 
 
 
 
 
 
 
1140
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
1141
 
1142
  # print min and max
 
1094
  noise_pred_uncond_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
1095
  noise_pred_text_sum = torch.zeros_like(latents).to(device).to(dtype=torch.float16)
1096
  latent_counter = torch.zeros(num_frames).to(device).to(dtype=torch.float16)
1097
+ max_sum = 0
1098
+ min_sum = 0
1099
+ max_sum_cond = 0
1100
+ min_sum_cond = 0
1101
 
1102
  # foreach context group seperately denoise the current timestep
1103
  for context_group in range(len(context_indexes[i])):
 
1127
  # add the ending frames from noise_pred_uncond to the start of the noise_pred_uncond_sum
1128
  noise_pred_uncond_sum[:, :,current_context_indexes, :, :] += noise_pred_uncond
1129
  noise_pred_text_sum[:, :,current_context_indexes, :, :] += noise_pred_text
1130
+ # track the average min and max for normalization
1131
+ max_sum += noise_pred_uncond.max()
1132
+ min_sum += noise_pred_uncond.min()
1133
+ max_sum_cond += noise_pred_text.max()
1134
+ min_sum_cond += noise_pred_text.min()
1135
  #increase the counter for the ending frames
1136
  latent_counter[current_context_indexes] += 1
1137
 
 
1146
  latent_counter = latent_counter.reshape(1, 1, num_frames, 1, 1)
1147
  noise_pred_uncond = noise_pred_uncond_sum / latent_counter
1148
  noise_pred_text = noise_pred_text_sum / latent_counter
1149
+
1150
+ # calculate the average min and max for normalization
1151
+ avg_max = max_sum / latent_counter.sum()
1152
+ avg_min = min_sum / latent_counter.sum()
1153
+ avg_max_cond = max_sum_cond / latent_counter.sum()
1154
+ avg_min_cond = min_sum_cond / latent_counter.sum()
1155
+
1156
+ # scale the noise predictions to the range of the avg min and max
1157
+ noise_pred_uncond = (noise_pred_uncond - avg_min) / (avg_max - avg_min)
1158
+ noise_pred_text = (noise_pred_text - avg_min_cond) / (avg_max_cond - avg_min_cond)
1159
+
1160
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
1161
 
1162
  # print min and max