VEO3-RealTime

Running on Zero

App Files Files Community

seawolf2357 commited on Jun 19

Commit

c2c95c1

verified ·

1 Parent(s): a34249d

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -110,7 +110,7 @@ parser.add_argument("--checkpoint_path", type=str, default='./checkpoints/self_f
 parser.add_argument("--config_path", type=str, default='./configs/self_forcing_dmd.yaml', help="Path to the model config.")
 parser.add_argument('--share', action='store_true', help="Create a public Gradio link.")
 parser.add_argument('--trt', action='store_true', help="Use TensorRT optimized VAE decoder.")
-parser.add_argument('--fps', type=float, default=15.0, help="Playback FPS for frame streaming.")
 args = parser.parse_args()
 gpu = "cuda"
@@ -257,7 +257,7 @@ pipeline.to(dtype=torch.float16).to(gpu)
 @torch.no_grad()
 @spaces.GPU
-def video_generation_handler_streaming(prompt, seed=42, fps=15):
     """
     Generator function that yields .ts video chunks using PyAV for streaming.
     Now optimized for block-based processing.
@@ -277,14 +277,14 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
     pipeline._initialize_kv_cache(1, torch.float16, device=gpu)
     pipeline._initialize_crossattn_cache(1, torch.float16, device=gpu)
-    # 5.5초 영상을 위해 노이즈 텐서 크기 증가 (21 -> 24)
-    noise = torch.randn([1, 24, 16, 60, 104], device=gpu, dtype=torch.float16, generator=rnd)
     vae_cache, latents_cache = None, None
     if not APP_STATE["current_use_taehv"] and not args.trt:
         vae_cache = [c.to(device=gpu, dtype=torch.float16) for c in ZERO_VAE_CACHE]
-    num_blocks = 8  # 7 -> 8로 증가하여 약 5.5초 영상 생성
     current_start_frame = 0
     all_num_frames = [pipeline.num_frame_per_block] * num_blocks
@@ -369,7 +369,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
             frame_status_html = (
                 f"<div style='padding: 10px; border: 1px solid #ddd; border-radius: 8px; font-family: sans-serif;'>"
-                f"  <p style='margin: 0 0 8px 0; font-size: 16px; font-weight: bold;'>Generating Video...</p>"
                 f"  <div style='background: #e9ecef; border-radius: 4px; width: 100%; overflow: hidden;'>"
                 f"    <div style='width: {total_progress:.1f}%; height: 20px; background-color: #0d6efd; transition: width 0.2s;'></div>"
                 f"  </div>"
@@ -407,7 +407,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
         current_start_frame += current_num_frames
         # 메모리 효율성을 위한 GPU 캐시 정리
-        if idx < num_blocks - 1 and idx % 2 == 1:  # 2블록마다 캐시 정리
             torch.cuda.empty_cache()
     # Final completion status
@@ -456,7 +456,7 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
             f"      📊 Generated {total_frames_yielded} frames across {num_blocks} blocks ({video_duration:.1f} seconds)"
             f"    </p>"
             f"    <p style='margin: 0; color: #0f5132; font-size: 14px;'>"
-            f"      🎬 Resolution: {all_frames_for_download[0].shape[1]}x{all_frames_for_download[0].shape[0]} • FPS: {fps} • Size: {file_size_mb:.1f} MB"
             f"    </p>"
             f"    <p style='margin: 8px 0 0 0; color: #0f5132; font-size: 13px; font-style: italic;'>"
             f"      💾 Click the download button below to save your video!"
@@ -479,8 +479,8 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15):
 # --- Gradio UI Layout ---
 with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
-    gr.Markdown("# 🚀 Self-Forcing Video Generation (6-second)")
-    gr.Markdown("Real-time 6-second video generation with distilled Wan2-1 1.3B [[Model]](https://huggingface.co/gdhe17/Self-Forcing), [[Project page]](https://self-forcing.github.io), [[Paper]](https://huggingface.co/papers/2506.08009)")
     with gr.Row():
         with gr.Column(scale=2):
@@ -506,6 +506,7 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
             )
             gr.Markdown("### ⚙️ Settings")
             with gr.Row():
                 seed = gr.Number(
                     label="Seed",
@@ -515,12 +516,12 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
                 )
                 fps = gr.Slider(
                     label="Playback FPS",
-                    minimum=1,
                     maximum=30,
                     value=args.fps,
                     step=1,
-                    visible=False,
-                    info="Frames per second for playback"
                 )
         with gr.Column(scale=3):
@@ -548,8 +549,9 @@ with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
             # 다운로드용 파일 출력
             download_file = gr.File(
-                label="📥 Download Video",
-                visible=False
             )
     # Connect the generator to the streaming video

 parser.add_argument("--config_path", type=str, default='./configs/self_forcing_dmd.yaml', help="Path to the model config.")
 parser.add_argument('--share', action='store_true', help="Create a public Gradio link.")
 parser.add_argument('--trt', action='store_true', help="Use TensorRT optimized VAE decoder.")
+parser.add_argument('--fps', type=float, default=12.0, help="Playback FPS for frame streaming.")
 args = parser.parse_args()
 gpu = "cuda"
 @torch.no_grad()
 @spaces.GPU
+def video_generation_handler_streaming(prompt, seed=42, fps=12):
     """
     Generator function that yields .ts video chunks using PyAV for streaming.
     Now optimized for block-based processing.
     pipeline._initialize_kv_cache(1, torch.float16, device=gpu)
     pipeline._initialize_crossattn_cache(1, torch.float16, device=gpu)
+    # 노이즈 텐서 크기
+    noise = torch.randn([1, 21, 16, 60, 104], device=gpu, dtype=torch.float16, generator=rnd)
     vae_cache, latents_cache = None, None
     if not APP_STATE["current_use_taehv"] and not args.trt:
         vae_cache = [c.to(device=gpu, dtype=torch.float16) for c in ZERO_VAE_CACHE]
+    num_blocks = 7  # 원래 설정으로 복원
     current_start_frame = 0
     all_num_frames = [pipeline.num_frame_per_block] * num_blocks
             frame_status_html = (
                 f"<div style='padding: 10px; border: 1px solid #ddd; border-radius: 8px; font-family: sans-serif;'>"
+                f"  <p style='margin: 0 0 8px 0; font-size: 16px; font-weight: bold;'>🎬 Generating Video...</p>"
                 f"  <div style='background: #e9ecef; border-radius: 4px; width: 100%; overflow: hidden;'>"
                 f"    <div style='width: {total_progress:.1f}%; height: 20px; background-color: #0d6efd; transition: width 0.2s;'></div>"
                 f"  </div>"
         current_start_frame += current_num_frames
         # 메모리 효율성을 위한 GPU 캐시 정리
+        if idx < num_blocks - 1 and idx % 3 == 2:  # 3블록마다 캐시 정리
             torch.cuda.empty_cache()
     # Final completion status
             f"      📊 Generated {total_frames_yielded} frames across {num_blocks} blocks ({video_duration:.1f} seconds)"
             f"    </p>"
             f"    <p style='margin: 0; color: #0f5132; font-size: 14px;'>"
+            f"      🎬 Resolution: {all_frames_for_download[0].shape[1]}x{all_frames_for_download[0].shape[0]} • FPS: {fps} • Duration: {video_duration:.1f}s • Size: {file_size_mb:.1f} MB"
             f"    </p>"
             f"    <p style='margin: 8px 0 0 0; color: #0f5132; font-size: 13px; font-style: italic;'>"
             f"      💾 Click the download button below to save your video!"
 # --- Gradio UI Layout ---
 with gr.Blocks(title="Self-Forcing Streaming Demo") as demo:
+    gr.Markdown("# 🚀 Self-Forcing Video Generation")
+    gr.Markdown("Real-time video generation with distilled Wan2-1 1.3B | 5-6 seconds duration [[Model]](https://huggingface.co/gdhe17/Self-Forcing), [[Project page]](https://self-forcing.github.io), [[Paper]](https://huggingface.co/papers/2506.08009)")
     with gr.Row():
         with gr.Column(scale=2):
             )
             gr.Markdown("### ⚙️ Settings")
+            gr.Markdown("💡 **Tip**: Adjust FPS to control video duration (8 FPS → ~10s, 10 FPS → ~8s, 12 FPS → ~6.8s, 15 FPS → ~5.4s)")
             with gr.Row():
                 seed = gr.Number(
                     label="Seed",
                 )
                 fps = gr.Slider(
                     label="Playback FPS",
+                    minimum=8,
                     maximum=30,
                     value=args.fps,
                     step=1,
+                    visible=True,
+                    info="Lower FPS = longer video duration"
                 )
         with gr.Column(scale=3):
             # 다운로드용 파일 출력
             download_file = gr.File(
+                label="📥 Download Generated Video",
+                visible=False,
+                elem_id="download_file"
             )
     # Connect the generator to the streaming video