Bhargavssss commited on
Commit
085f4ee
·
verified ·
1 Parent(s): e13087b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -94
app.py CHANGED
@@ -2,55 +2,31 @@ import os
2
  import uuid
3
  from omegaconf import OmegaConf
4
  import spaces
5
-
6
  import random
7
-
8
  import imageio
9
  import torch
10
  import torchvision
11
  import gradio as gr
12
  import numpy as np
13
-
 
14
  from gradio.components import Textbox, Video
15
  from huggingface_hub import hf_hub_download
16
-
17
  from utils.common_utils import load_model_checkpoint
18
  from utils.utils import instantiate_from_config
19
  from scheduler.t2v_turbo_scheduler import T2VTurboScheduler
20
  from pipeline.t2v_turbo_vc2_pipeline import T2VTurboVC2Pipeline
21
 
22
- DESCRIPTION = """# T2V-Turbo 🚀
23
-
24
- Our model is distilled from [VideoCrafter2](https://ailab-cvc.github.io/videocrafter2/).
25
-
26
- T2V-Turbo learns a LoRA on top of the base model by aligning to the reward feedback from [HPSv2.1](https://github.com/tgxs002/HPSv2/tree/master) and [InternVid2 Stage 2 Model](https://huggingface.co/OpenGVLab/InternVideo2-Stage2_1B-224p-f4).
27
-
28
- T2V-Turbo-v2 optimizes the training techniques by finetuning the full base model and further aligns to [CLIPScore](https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K)
29
-
30
- T2V-Turbo trains on pure WebVid-10M data, whereas T2V-Turbo-v2 carufully optimizes different learning objectives with a mixutre of VidGen-1M and WebVid-10M data.
31
-
32
- Moreover, T2V-Turbo-v2 supports to distill motion priors from the training videos.
33
-
34
- [Project page for T2V-Turbo](https://t2v-turbo.github.io) 🥳
35
-
36
- [Project page for T2V-Turbo-v2](https://t2v-turbo-v2.github.io) 🤓
37
- """
38
- if torch.cuda.is_available():
39
- DESCRIPTION += "\n<p>Running on CUDA 😀</p>"
40
- elif hasattr(torch, "xpu") and torch.xpu.is_available():
41
- DESCRIPTION += "\n<p>Running on XPU 🤓</p>"
42
- else:
43
- DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
44
 
45
  MAX_SEED = np.iinfo(np.int32).max
46
-
47
 
48
  def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
49
  if randomize_seed:
50
  seed = random.randint(0, MAX_SEED)
51
  return seed
52
 
53
-
54
  def save_video(video_array, video_save_path, fps: int = 16):
55
  video = video_array.detach().cpu()
56
  video = torch.clamp(video.float(), -1.0, 1.0)
@@ -62,17 +38,7 @@ def save_video(video_array, video_save_path, fps: int = 16):
62
  video_save_path, video, fps=fps, video_codec="h264", options={"crf": "10"}
63
  )
64
 
65
- example_txt = [
66
- "An astronaut riding a horse.",
67
- "Darth vader surfing in waves.",
68
- "light wind, feathers moving, she moves her gaze, 4k",
69
- "a girl floating underwater.",
70
- "Pikachu snowboarding.",
71
- "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k",
72
- "A musician strums his guitar, serenading the moonlit night.",
73
- ]
74
-
75
- examples = [[i, 7.5, 0.5, 16, 16, 0, True, "bf16"] for i in example_txt]
76
 
77
  @spaces.GPU(duration=120)
78
  @torch.inference_mode()
@@ -87,8 +53,8 @@ def generate(
87
  param_dtype="bf16",
88
  motion_gs: float = 0.05,
89
  fps: int = 8,
 
90
  ):
91
-
92
  seed = randomize_seed_fn(seed, randomize_seed)
93
  torch.manual_seed(seed)
94
 
@@ -123,26 +89,70 @@ def generate(
123
  )
124
 
125
  torch.cuda.empty_cache()
126
- tmp_save_path = "tmp.mp4"
 
 
 
 
 
 
127
  root_path = "./videos/"
128
  os.makedirs(root_path, exist_ok=True)
129
- video_save_path = os.path.join(root_path, tmp_save_path)
130
 
131
  save_video(result[0], video_save_path, fps=fps)
132
  display_model_info = f"Video size: {num_frames}x320x512, Sampling Step: {num_inference_steps}, Guidance Scale: {guidance_scale}"
 
 
 
 
 
 
 
 
133
  return video_save_path, prompt, display_model_info, seed
134
 
135
-
136
- block_css = """
137
- #buttons button {
138
- min-width: min(120px,100%);
139
- }
140
- """
141
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
  if __name__ == "__main__":
144
  device = torch.device("cuda:0")
145
 
 
146
  config = OmegaConf.load("configs/inference_t2v_512_v2.0.yaml")
147
  model_config = config.pop("model", OmegaConf.create())
148
  pretrained_t2v = instantiate_from_config(model_config)
@@ -169,54 +179,18 @@ if __name__ == "__main__":
169
  pipeline = T2VTurboVC2Pipeline(pretrained_t2v, scheduler, model_config)
170
  pipeline.to(device)
171
 
 
172
  demo = gr.Interface(
173
- fn=generate,
174
  inputs=[
175
  Textbox(label="", placeholder="Please enter your prompt. \n"),
176
- gr.Slider(
177
- label="Guidance scale",
178
- minimum=2,
179
- maximum=14,
180
- step=0.1,
181
- value=7.5,
182
- ),
183
- gr.Slider(
184
- label="Percentage of steps to apply motion guidance (v2 w/ MG only)",
185
- minimum=0.0,
186
- maximum=0.5,
187
- step=0.05,
188
- value=0.5,
189
- ),
190
- gr.Slider(
191
- label="Number of inference steps",
192
- minimum=4,
193
- maximum=50,
194
- step=1,
195
- value=16,
196
- ),
197
- gr.Slider(
198
- label="Number of Video Frames",
199
- minimum=16,
200
- maximum=48,
201
- step=8,
202
- value=16,
203
- ),
204
- gr.Slider(
205
- label="Seed",
206
- minimum=0,
207
- maximum=MAX_SEED,
208
- step=1,
209
- value=0,
210
- randomize=True,
211
- ),
212
  gr.Checkbox(label="Randomize seed", value=True),
213
- gr.Radio(
214
- ["bf16", "fp16", "fp32"],
215
- label="torch.dtype",
216
- value="bf16",
217
- interactive=True,
218
- info="Dtype for inference. Default is bf16.",
219
- )
220
  ],
221
  outputs=[
222
  gr.Video(label="Generated Video", width=512, height=320, interactive=False, autoplay=True),
@@ -231,4 +205,9 @@ if __name__ == "__main__":
231
  cache_examples=False,
232
  concurrency_limit=10,
233
  )
234
- demo.launch()
 
 
 
 
 
 
2
  import uuid
3
  from omegaconf import OmegaConf
4
  import spaces
 
5
  import random
 
6
  import imageio
7
  import torch
8
  import torchvision
9
  import gradio as gr
10
  import numpy as np
11
+ from fastapi import FastAPI
12
+ from fastapi.responses import FileResponse
13
  from gradio.components import Textbox, Video
14
  from huggingface_hub import hf_hub_download
 
15
  from utils.common_utils import load_model_checkpoint
16
  from utils.utils import instantiate_from_config
17
  from scheduler.t2v_turbo_scheduler import T2VTurboScheduler
18
  from pipeline.t2v_turbo_vc2_pipeline import T2VTurboVC2Pipeline
19
 
20
+ # Keep all your original constants and DESCRIPTION
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  MAX_SEED = np.iinfo(np.int32).max
23
+ app = FastAPI()
24
 
25
  def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
26
  if randomize_seed:
27
  seed = random.randint(0, MAX_SEED)
28
  return seed
29
 
 
30
  def save_video(video_array, video_save_path, fps: int = 16):
31
  video = video_array.detach().cpu()
32
  video = torch.clamp(video.float(), -1.0, 1.0)
 
38
  video_save_path, video, fps=fps, video_codec="h264", options={"crf": "10"}
39
  )
40
 
41
+ # Keep your original example_txt and examples
 
 
 
 
 
 
 
 
 
 
42
 
43
  @spaces.GPU(duration=120)
44
  @torch.inference_mode()
 
53
  param_dtype="bf16",
54
  motion_gs: float = 0.05,
55
  fps: int = 8,
56
+ is_api: bool = False, # New parameter to handle API calls
57
  ):
 
58
  seed = randomize_seed_fn(seed, randomize_seed)
59
  torch.manual_seed(seed)
60
 
 
89
  )
90
 
91
  torch.cuda.empty_cache()
92
+
93
+ # Generate unique filename for API calls
94
+ if is_api:
95
+ video_filename = f"{uuid.uuid4()}.mp4"
96
+ else:
97
+ video_filename = "tmp.mp4"
98
+
99
  root_path = "./videos/"
100
  os.makedirs(root_path, exist_ok=True)
101
+ video_save_path = os.path.join(root_path, video_filename)
102
 
103
  save_video(result[0], video_save_path, fps=fps)
104
  display_model_info = f"Video size: {num_frames}x320x512, Sampling Step: {num_inference_steps}, Guidance Scale: {guidance_scale}"
105
+
106
+ if is_api:
107
+ return {
108
+ "video_path": video_save_path,
109
+ "prompt": prompt,
110
+ "model_info": display_model_info,
111
+ "seed": seed
112
+ }
113
  return video_save_path, prompt, display_model_info, seed
114
 
115
+ # API endpoint
116
+ @app.post("/generate")
117
+ async def generate_api(
118
+ prompt: str,
119
+ guidance_scale: float = 7.5,
120
+ percentage: float = 0.5,
121
+ num_inference_steps: int = 4,
122
+ num_frames: int = 16,
123
+ seed: int = 0,
124
+ randomize_seed: bool = False,
125
+ param_dtype: str = "bf16",
126
+ motion_gs: float = 0.05,
127
+ fps: int = 8,
128
+ ):
129
+ result = generate(
130
+ prompt=prompt,
131
+ guidance_scale=guidance_scale,
132
+ percentage=percentage,
133
+ num_inference_steps=num_inference_steps,
134
+ num_frames=num_frames,
135
+ seed=seed,
136
+ randomize_seed=randomize_seed,
137
+ param_dtype=param_dtype,
138
+ motion_gs=motion_gs,
139
+ fps=fps,
140
+ is_api=True
141
+ )
142
+
143
+ return FileResponse(
144
+ result["video_path"],
145
+ media_type="video/mp4",
146
+ headers={
147
+ "X-Model-Info": result["model_info"],
148
+ "X-Seed": str(result["seed"])
149
+ }
150
+ )
151
 
152
  if __name__ == "__main__":
153
  device = torch.device("cuda:0")
154
 
155
+ # Keep all your original model initialization code
156
  config = OmegaConf.load("configs/inference_t2v_512_v2.0.yaml")
157
  model_config = config.pop("model", OmegaConf.create())
158
  pretrained_t2v = instantiate_from_config(model_config)
 
179
  pipeline = T2VTurboVC2Pipeline(pretrained_t2v, scheduler, model_config)
180
  pipeline.to(device)
181
 
182
+ # Mount both Gradio and FastAPI
183
  demo = gr.Interface(
184
+ fn=lambda *args: generate(*args, is_api=False),
185
  inputs=[
186
  Textbox(label="", placeholder="Please enter your prompt. \n"),
187
+ gr.Slider(label="Guidance scale", minimum=2, maximum=14, step=0.1, value=7.5),
188
+ gr.Slider(label="Percentage of steps to apply motion guidance", minimum=0.0, maximum=0.5, step=0.05, value=0.5),
189
+ gr.Slider(label="Number of inference steps", minimum=4, maximum=50, step=1, value=16),
190
+ gr.Slider(label="Number of Video Frames", minimum=16, maximum=48, step=8, value=16),
191
+ gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, randomize=True),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  gr.Checkbox(label="Randomize seed", value=True),
193
+ gr.Radio(["bf16", "fp16", "fp32"], label="torch.dtype", value="bf16", interactive=True),
 
 
 
 
 
 
194
  ],
195
  outputs=[
196
  gr.Video(label="Generated Video", width=512, height=320, interactive=False, autoplay=True),
 
205
  cache_examples=False,
206
  concurrency_limit=10,
207
  )
208
+
209
+ app = gr.mount_gradio_app(app, demo, path="/")
210
+
211
+ # Run both servers
212
+ import uvicorn
213
+ uvicorn.run(app, host="0.0.0.0", port=7860)