linoyts HF Staff commited on
Commit
48fbb23
·
verified ·
1 Parent(s): 0093903

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -25
app.py CHANGED
@@ -38,6 +38,8 @@ def generate(prompt,
38
  negative_prompt,
39
  image,
40
  video,
 
 
41
  mode,
42
  steps,
43
  num_frames,
@@ -51,15 +53,15 @@ def generate(prompt,
51
 
52
  # Part 1. Generate video at smaller resolution
53
  # Text-only conditioning is also supported without the need to pass `conditions`
54
- expected_height, expected_width = 768, 1152 #todo make configurable
55
  downscale_factor = 2 / 3
56
  downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
57
  downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
58
 
59
- if mode == "text-to-video" and video is not None:
60
  video = load_video(video)[:frames_to_use]
61
  condition = True
62
- elif mode == "image-to-video" and image is not None:
63
  video = [image]
64
  condition = True
65
  else:
@@ -85,22 +87,6 @@ def generate(prompt,
85
  output_type="latent",
86
  ).frames
87
 
88
-
89
-
90
-
91
- # latents = pipe(
92
- # conditions=condition1,
93
- # prompt=prompt,
94
- # negative_prompt=negative_prompt,
95
- # # width=downscaled_width,
96
- # # height=downscaled_height,
97
- # num_frames=num_frames,
98
- # num_inference_steps=steps,
99
- # decode_timestep = 0.05,
100
- # decode_noise_scale = 0.025,
101
- # generator=torch.Generator().manual_seed(seed),
102
- # #output_type="latent",
103
- # ).frames
104
 
105
  # Part 2. Upscale generated video using latent upsampler with fewer inference steps
106
  # The available latent upsampler upscales the height/width by 2x
@@ -120,7 +106,7 @@ def generate(prompt,
120
  height=upscaled_height,
121
  num_frames=num_frames,
122
  guidance_scale=1.0,
123
- denoise_strength=0.6, # Effectively, 4 inference steps out of 10
124
  num_inference_steps=3,
125
  latents=upscaled_latents,
126
  decode_timestep=0.05,
@@ -168,15 +154,14 @@ with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
168
  with gr.Column():
169
  with gr.Group():
170
  with gr.Tab("text-to-video") as text_tab:
171
- image = gr.Image(label="", visible=False)
172
- #prompt = gr.Textbox(label="prompt")
173
  with gr.Tab("image-to-video") as image_tab:
174
- image = gr.Image(label="")
175
  with gr.Tab("video-to-video") as video_tab:
176
- video = gr.Video(label="")
177
  frames_to_use = gr.Number(label="num frames to use",info="first # of frames to use from the input video", value=1)
178
  prompt = gr.Textbox(label="prompt")
179
- improve_texture = gr.Checkbox(label="improve texture", value=False, info="note it slows generation")
180
  run_button = gr.Button()
181
  with gr.Column():
182
  output = gr.Video(interactive=False)
@@ -204,6 +189,8 @@ with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
204
  negative_prompt,
205
  image,
206
  video,
 
 
207
  mode,
208
  steps,
209
  num_frames,
 
38
  negative_prompt,
39
  image,
40
  video,
41
+ height,
42
+ width,
43
  mode,
44
  steps,
45
  num_frames,
 
53
 
54
  # Part 1. Generate video at smaller resolution
55
  # Text-only conditioning is also supported without the need to pass `conditions`
56
+ expected_height, expected_width = height, width
57
  downscale_factor = 2 / 3
58
  downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
59
  downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
60
 
61
+ if mode == "text-to-video" and (video is not None):
62
  video = load_video(video)[:frames_to_use]
63
  condition = True
64
+ elif mode == "image-to-video" and (image is not None):
65
  video = [image]
66
  condition = True
67
  else:
 
87
  output_type="latent",
88
  ).frames
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # Part 2. Upscale generated video using latent upsampler with fewer inference steps
92
  # The available latent upsampler upscales the height/width by 2x
 
106
  height=upscaled_height,
107
  num_frames=num_frames,
108
  guidance_scale=1.0,
109
+ denoise_strength=0.6, # Effectively, 0.6 * 3 inference steps
110
  num_inference_steps=3,
111
  latents=upscaled_latents,
112
  decode_timestep=0.05,
 
154
  with gr.Column():
155
  with gr.Group():
156
  with gr.Tab("text-to-video") as text_tab:
157
+ image_n = gr.Image(label="", visible=False)
 
158
  with gr.Tab("image-to-video") as image_tab:
159
+ image = gr.Image(label="input image")
160
  with gr.Tab("video-to-video") as video_tab:
161
+ video = gr.Video(label="input video")
162
  frames_to_use = gr.Number(label="num frames to use",info="first # of frames to use from the input video", value=1)
163
  prompt = gr.Textbox(label="prompt")
164
+ improve_texture = gr.Checkbox(label="improve texture", value=False, info="slows down generation")
165
  run_button = gr.Button()
166
  with gr.Column():
167
  output = gr.Video(interactive=False)
 
189
  negative_prompt,
190
  image,
191
  video,
192
+ height,
193
+ width,
194
  mode,
195
  steps,
196
  num_frames,