jbilcke-hf HF staff commited on
Commit
a36a3bb
1 Parent(s): e00ae5a

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +10 -9
handler.py CHANGED
@@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
23
 
24
  # Constraints
25
  MAX_LARGE_SIDE = 1280
26
- MAX_SMALL_SIDE = 720
27
  MAX_FRAMES = (8 * 21) + 1 # visual glitches appear after about 169 frames, so we cap it
28
 
29
  # this is only a temporary solution (famous last words)
@@ -78,15 +78,16 @@ class GenerationConfig:
78
  negative_prompt: str = "saturated, overlit, worst quality, inconsistent motion, blurry, jittery, distorted, cropped, watermarked, watermark, logo, subtitle, subtitles, lowres"
79
 
80
  # video model settings (will be used during generation of the initial raw video clip)
81
- width: int = 768 # max is 1280 but we use a lower value
82
- height: int = 416 # max is 720 but we use a lower value
 
83
 
84
  # users may tend to always set this to the max, to get as much useable content as possible (which is MAX_FRAMES ie. 257).
85
  # The value must be a multiple of 8, plus 1 frame.
86
  # visual glitches appear after about 169 frames, so we don't need more actually
87
  num_frames: int = (8 * 14) + 1
88
 
89
- guidance_scale: float = 4.0
90
  num_inference_steps: int = 30
91
 
92
  # reproducible generation settings
@@ -116,12 +117,12 @@ class GenerationConfig:
116
  total_pixels = self.width * self.height
117
  if total_pixels > MAX_TOTAL_PIXELS:
118
  scale = (MAX_TOTAL_PIXELS / total_pixels) ** 0.5
119
- self.width = max(128, min(MAX_LARGE_SIDE, round(self.width * scale / 16) * 16))
120
- self.height = max(128, min(MAX_LARGE_SIDE, round(self.height * scale / 16) * 16))
121
  else:
122
- # Round dimensions to nearest multiple of 16
123
- self.width = max(128, min(MAX_LARGE_SIDE, round(self.width / 16) * 16))
124
- self.height = max(128, min(MAX_LARGE_SIDE, round(self.height / 16) * 16))
125
 
126
  # Adjust number of frames to be in format 8k + 1
127
  k = (self.num_frames - 1) // 8
 
23
 
24
  # Constraints
25
  MAX_LARGE_SIDE = 1280
26
+ MAX_SMALL_SIDE = 768 # should be 720 but it must be divisible by 32
27
  MAX_FRAMES = (8 * 21) + 1 # visual glitches appear after about 169 frames, so we cap it
28
 
29
  # this is only a temporary solution (famous last words)
 
78
  negative_prompt: str = "saturated, overlit, worst quality, inconsistent motion, blurry, jittery, distorted, cropped, watermarked, watermark, logo, subtitle, subtitles, lowres"
79
 
80
  # video model settings (will be used during generation of the initial raw video clip)
81
+ # we use small values to make things a bit faster
82
+ width: int = 768
83
+ height: int = 416
84
 
85
  # users may tend to always set this to the max, to get as much useable content as possible (which is MAX_FRAMES ie. 257).
86
  # The value must be a multiple of 8, plus 1 frame.
87
  # visual glitches appear after about 169 frames, so we don't need more actually
88
  num_frames: int = (8 * 14) + 1
89
 
90
+ guidance_scale: float = 5.0
91
  num_inference_steps: int = 30
92
 
93
  # reproducible generation settings
 
117
  total_pixels = self.width * self.height
118
  if total_pixels > MAX_TOTAL_PIXELS:
119
  scale = (MAX_TOTAL_PIXELS / total_pixels) ** 0.5
120
+ self.width = max(128, min(MAX_LARGE_SIDE, round(self.width * scale / 32) * 32))
121
+ self.height = max(128, min(MAX_LARGE_SIDE, round(self.height * scale / 32) * 32))
122
  else:
123
+ # Round dimensions to nearest multiple of 32
124
+ self.width = max(128, min(MAX_LARGE_SIDE, round(self.width / 32) * 32))
125
+ self.height = max(128, min(MAX_LARGE_SIDE, round(self.height / 32) * 32))
126
 
127
  # Adjust number of frames to be in format 8k + 1
128
  k = (self.num_frames - 1) // 8