fast-rendering-node-for-clapper

Paused

App Files Files Community

jbilcke-hf HF Staff commited on Jul 4

Commit

54eccd3

verified ·

1 Parent(s): b55bb25

Create app_broken_lora.py

Browse files

Files changed (1) hide show

app_broken_lora.py +654 -0

app_broken_lora.py ADDED Viewed

	@@ -0,0 +1,654 @@

+import subprocess
+# not sure why it works in the original space but says "pip not found" in mine
+#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+import os
+from huggingface_hub import snapshot_download, hf_hub_download
+# Configuration for data paths
+DATA_ROOT = os.path.normpath(os.getenv('DATA_ROOT', '.'))
+WAN_MODELS_PATH = os.path.join(DATA_ROOT, 'wan_models')
+OTHER_MODELS_PATH = os.path.join(DATA_ROOT, 'other_models')
+snapshot_download(
+    repo_id="Wan-AI/Wan2.1-T2V-1.3B",
+    local_dir=os.path.join(WAN_MODELS_PATH, "Wan2.1-T2V-1.3B"),
+    local_dir_use_symlinks=False,
+    resume_download=True,
+    repo_type="model"
+)
+hf_hub_download(
+    repo_id="gdhe17/Self-Forcing",
+    filename="checkpoints/self_forcing_dmd.pt",
+    local_dir=OTHER_MODELS_PATH,
+    local_dir_use_symlinks=False
+)
+import re
+import random
+import argparse
+import hashlib
+import urllib.request
+import time
+from PIL import Image
+import torch
+import gradio as gr
+from omegaconf import OmegaConf
+from tqdm import tqdm
+import imageio
+import av
+import uuid
+import tempfile
+import shutil
+from pathlib import Path
+from typing import Dict, Any, List, Optional, Tuple, Union
+from pipeline import CausalInferencePipeline
+from demo_utils.constant import ZERO_VAE_CACHE
+from demo_utils.vae_block3 import VAEDecoderWrapper
+from utils.wan_wrapper import WanDiffusionWrapper, WanTextEncoder
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM #, BitsAndBytesConfig
+import numpy as np
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# LoRA Storage Configuration
+STORAGE_PATH = Path(DATA_ROOT) / "storage"
+LORA_PATH = STORAGE_PATH / "loras"
+OUTPUT_PATH = STORAGE_PATH / "output"
+# Create necessary directories
+STORAGE_PATH.mkdir(parents=True, exist_ok=True)
+LORA_PATH.mkdir(parents=True, exist_ok=True)
+OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
+# Global variables for LoRA management
+current_lora_id = None
+current_lora_path = None
+# --- Argument Parsing ---
+parser = argparse.ArgumentParser(description="Gradio Demo for Self-Forcing with Frame Streaming")
+parser.add_argument('--port', type=int, default=7860, help="Port to run the Gradio app on.")
+parser.add_argument('--host', type=str, default='0.0.0.0', help="Host to bind the Gradio app to.")
+parser.add_argument("--checkpoint_path", type=str, default=os.path.join(OTHER_MODELS_PATH, 'checkpoints', 'self_forcing_dmd.pt'), help="Path to the model checkpoint.")
+parser.add_argument("--config_path", type=str, default='./configs/self_forcing_dmd.yaml', help="Path to the model config.")
+parser.add_argument('--share', action='store_true', help="Create a public Gradio link.")
+parser.add_argument('--trt', action='store_true', help="Use TensorRT optimized VAE decoder.")
+parser.add_argument('--fps', type=float, default=15.0, help="Playback FPS for frame streaming.")
+args = parser.parse_args()
+gpu = "cuda"
+try:
+    config = OmegaConf.load(args.config_path)
+    default_config = OmegaConf.load("configs/default_config.yaml")
+    config = OmegaConf.merge(default_config, config)
+except FileNotFoundError as e:
+    print(f"Error loading config file: {e}\n. Please ensure config files are in the correct path.")
+    exit(1)
+# Initialize Models
+print("Initializing models...")
+text_encoder = WanTextEncoder()
+transformer = WanDiffusionWrapper(is_causal=True)
+try:
+    state_dict = torch.load(args.checkpoint_path, map_location="cpu")
+    transformer.load_state_dict(state_dict.get('generator_ema', state_dict.get('generator')))
+except FileNotFoundError as e:
+    print(f"Error loading checkpoint: {e}\nPlease ensure the checkpoint '{args.checkpoint_path}' exists.")
+    exit(1)
+text_encoder.eval().to(dtype=torch.float16).requires_grad_(False)
+transformer.eval().to(dtype=torch.float16).requires_grad_(False)
+text_encoder.to(gpu)
+transformer.to(gpu)
+APP_STATE = {
+    "torch_compile_applied": False,
+    "fp8_applied": False,
+    "current_use_taehv": False,
+    "current_vae_decoder": None,
+}
+# I've tried to enable it, but I didn't notice a significant performance improvement..
+ENABLE_TORCH_COMPILATION = False
+# “default”: The default mode, used when no mode parameter is specified. It provides a good balance between performance and overhead.
+# “reduce-overhead”: Minimizes Python-related overhead using CUDA graphs. However, it may increase memory usage.
+# “max-autotune”: Uses Triton or template-based matrix multiplications on supported devices. It takes longer to compile but optimizes for the fastest possible execution. On GPUs it enables CUDA graphs by default.
+# “max-autotune-no-cudagraphs”: Similar to “max-autotune”, but without CUDA graphs.
+TORCH_COMPILATION_MODE = "default"
+# Apply torch.compile for maximum performance
+if not APP_STATE["torch_compile_applied"] and ENABLE_TORCH_COMPILATION:
+    print("🚀 Applying torch.compile for speed optimization...")
+    transformer.compile(mode=TORCH_COMPILATION_MODE)
+    APP_STATE["torch_compile_applied"] = True
+    print("✅ torch.compile applied to transformer")
+def upload_lora_file(file: tempfile._TemporaryFileWrapper) -> Tuple[str, str]:
+    """Upload a LoRA file and return a hash-based ID for future reference"""
+    if file is None:
+        return "", ""
+    try:
+        # Calculate SHA256 hash of the file
+        sha256_hash = hashlib.sha256()
+        with open(file.name, "rb") as f:
+            for chunk in iter(lambda: f.read(4096), b""):
+                sha256_hash.update(chunk)
+        file_hash = sha256_hash.hexdigest()
+        # Create destination path using hash
+        dest_path = LORA_PATH / f"{file_hash}.safetensors"
+        # Check if file already exists
+        if dest_path.exists():
+            print(f"LoRA file already exists!")
+            return file_hash, file_hash
+        # Copy the file to the destination
+        shutil.copy(file.name, dest_path)
+        print(f"LoRA file uploaded!")
+        return file_hash, file_hash
+    except Exception as e:
+        print(f"Error uploading LoRA file: {e}")
+        raise gr.Error(f"Failed to upload LoRA file: {str(e)}")
+def get_lora_file_path(lora_id: Optional[str]) -> Optional[Path]:
+    """Get the path to a LoRA file from its hash-based ID"""
+    if not lora_id:
+        return None
+    # Check if file exists
+    lora_path = LORA_PATH / f"{lora_id}.safetensors"
+    if lora_path.exists():
+        return lora_path
+    return None
+def manage_lora_weights(lora_id: Optional[str], lora_weight: float) -> Tuple[bool, Optional[Path]]:
+    """Manage LoRA weights for the transformer model"""
+    global current_lora_id, current_lora_path
+    # Determine if we should use LoRA
+    using_lora = lora_id is not None and lora_id.strip() != "" and lora_weight > 0
+    # If not using LoRA but we have one loaded, clear it
+    if not using_lora and current_lora_id is not None:
+        print(f"Clearing current LoRA")
+        current_lora_id = None
+        current_lora_path = None
+        return False, None
+    # If using LoRA, check if we need to change weights
+    if using_lora:
+        lora_path = get_lora_file_path(lora_id)
+        if not lora_path:
+            print(f"A LoRA file with this ID was found. Using base model instead.")
+            # If we had a LoRA loaded, clear it
+            if current_lora_id is not None:
+                print(f"Clearing current LoRA")
+                current_lora_id = None
+                current_lora_path = None
+            return False, None
+        # If LoRA ID changed, update
+        if lora_id != current_lora_id:
+            print(f"Loading LoRA..")
+            current_lora_id = lora_id
+            current_lora_path = lora_path
+        else:
+            print(f"Using a LoRA!")
+        return True, lora_path
+    return False, None
+def frames_to_ts_file(frames, filepath, fps = 15):
+    """
+    Convert frames directly to .ts file using PyAV.
+    Args:
+        frames: List of numpy arrays (HWC, RGB, uint8)
+        filepath: Output file path
+        fps: Frames per second
+    Returns:
+        The filepath of the created file
+    """
+    if not frames:
+        return filepath
+    height, width = frames[0].shape[:2]
+    # Create container for MPEG-TS format
+    container = av.open(filepath, mode='w', format='mpegts')
+    # Add video stream with optimized settings for streaming
+    stream = container.add_stream('h264', rate=fps)
+    stream.width = width
+    stream.height = height
+    stream.pix_fmt = 'yuv420p'
+    # Optimize for low latency streaming
+    stream.options = {
+        'preset': 'ultrafast',
+        'tune': 'zerolatency',
+        'crf': '23',
+        'profile': 'baseline',
+        'level': '3.0'
+    }
+    try:
+        for frame_np in frames:
+            frame = av.VideoFrame.from_ndarray(frame_np, format='rgb24')
+            frame = frame.reformat(format=stream.pix_fmt)
+            for packet in stream.encode(frame):
+                container.mux(packet)
+        for packet in stream.encode():
+            container.mux(packet)
+    finally:
+        container.close()
+    return filepath
+def initialize_vae_decoder(use_taehv=False, use_trt=False):
+    if use_trt:
+        from demo_utils.vae import VAETRTWrapper
+        print("Initializing TensorRT VAE Decoder...")
+        vae_decoder = VAETRTWrapper()
+        APP_STATE["current_use_taehv"] = False
+    elif use_taehv:
+        print("Initializing TAEHV VAE Decoder...")
+        from demo_utils.taehv import TAEHV
+        taehv_checkpoint_path = "checkpoints/taew2_1.pth"
+        if not os.path.exists(taehv_checkpoint_path):
+            print(f"Downloading TAEHV checkpoint to {taehv_checkpoint_path}...")
+            os.makedirs("checkpoints", exist_ok=True)
+            download_url = "https://github.com/madebyollin/taehv/raw/main/taew2_1.pth"
+            try:
+                urllib.request.urlretrieve(download_url, taehv_checkpoint_path)
+            except Exception as e:
+                raise RuntimeError(f"Failed to download taew2_1.pth: {e}")
+        class DotDict(dict): __getattr__ = dict.get
+        class TAEHVDiffusersWrapper(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.dtype = torch.float16
+                self.taehv = TAEHV(checkpoint_path=taehv_checkpoint_path).to(self.dtype)
+                self.config = DotDict(scaling_factor=1.0)
+            def decode(self, latents, return_dict=None):
+                return self.taehv.decode_video(latents, parallel=not LOW_MEMORY).mul_(2).sub_(1)
+        vae_decoder = TAEHVDiffusersWrapper()
+        APP_STATE["current_use_taehv"] = True
+    else:
+        print("Initializing Default VAE Decoder...")
+        vae_decoder = VAEDecoderWrapper()
+        try:
+            vae_state_dict = torch.load(os.path.join(WAN_MODELS_PATH, 'Wan2.1-T2V-1.3B', 'Wan2.1_VAE.pth'), map_location="cpu")
+            decoder_state_dict = {k: v for k, v in vae_state_dict.items() if 'decoder.' in k or 'conv2' in k}
+            vae_decoder.load_state_dict(decoder_state_dict)
+        except FileNotFoundError:
+            print("Warning: Default VAE weights not found.")
+        APP_STATE["current_use_taehv"] = False
+    vae_decoder.eval().to(dtype=torch.float16).requires_grad_(False).to(gpu)
+    # Apply torch.compile to VAE decoder if enabled (following demo.py pattern)
+    if APP_STATE["torch_compile_applied"] and not use_taehv and not use_trt:
+        print("🚀 Applying torch.compile to VAE decoder...")
+        vae_decoder.compile(mode=TORCH_COMPILATION_MODE)
+        print("✅ torch.compile applied to VAE decoder")
+    APP_STATE["current_vae_decoder"] = vae_decoder
+    print(f"✅ VAE decoder initialized: {'TAEHV' if use_taehv else 'Default VAE'}")
+# Initialize with default VAE
+initialize_vae_decoder(use_taehv=False, use_trt=args.trt)
+pipeline = CausalInferencePipeline(
+    config, device=gpu, generator=transformer, text_encoder=text_encoder,
+    vae=APP_STATE["current_vae_decoder"]
+)
+pipeline.to(dtype=torch.float16).to(gpu)
+@torch.no_grad()
+def video_generation_handler_streaming(prompt, seed=42, fps=15, width=400, height=224, duration=5, lora_id=None, lora_weight=0.0):
+    """
+    Generator function that yields .ts video chunks using PyAV for streaming.
+    """
+    if seed == -1:
+        seed = random.randint(0, 2**32 - 1)
+    # print(f"🎬 Starting PyAV streaming: seed: {seed}, duration: {duration}s")
+    # Handle LoRA weights
+    using_lora, lora_path = manage_lora_weights(lora_id, lora_weight)
+    if using_lora:
+        print(f"🎨 Using LoRA with weight factor {lora_weight}")
+    else:
+        print("🎨 Using base model (no LoRA)")
+    # Setup
+    conditional_dict = text_encoder(text_prompts=[prompt])
+    for key, value in conditional_dict.items():
+        conditional_dict[key] = value.to(dtype=torch.float16)
+    rnd = torch.Generator(gpu).manual_seed(int(seed))
+    pipeline._initialize_kv_cache(1, torch.float16, device=gpu)
+    pipeline._initialize_crossattn_cache(1, torch.float16, device=gpu)
+    noise = torch.randn([1, 21, 16, 60, 104], device=gpu, dtype=torch.float16, generator=rnd)
+    vae_cache, latents_cache = None, None
+    if not APP_STATE["current_use_taehv"] and not args.trt:
+        vae_cache = [c.to(device=gpu, dtype=torch.float16) for c in ZERO_VAE_CACHE]
+    # Calculate number of blocks based on duration
+    # Current setup generates approximately 5 seconds with 7 blocks
+    # So we scale proportionally
+    base_duration = 5.0  # seconds
+    base_blocks = 8
+    num_blocks = max(1, int(base_blocks * duration / base_duration))
+    current_start_frame = 0
+    all_num_frames = [pipeline.num_frame_per_block] * num_blocks
+    total_frames_yielded = 0
+    # Ensure temp directory exists
+    os.makedirs("gradio_tmp", exist_ok=True)
+    # Generation loop
+    for idx, current_num_frames in enumerate(all_num_frames):
+        print(f"📦 Processing block {idx+1}/{num_blocks}")
+        noisy_input = noise[:, current_start_frame : current_start_frame + current_num_frames]
+        # Denoising steps
+        for step_idx, current_timestep in enumerate(pipeline.denoising_step_list):
+            timestep = torch.ones([1, current_num_frames], device=noise.device, dtype=torch.int64) * current_timestep
+            _, denoised_pred = pipeline.generator(
+                noisy_image_or_video=noisy_input, conditional_dict=conditional_dict,
+                timestep=timestep, kv_cache=pipeline.kv_cache1,
+                crossattn_cache=pipeline.crossattn_cache,
+                current_start=current_start_frame * pipeline.frame_seq_length
+            )
+            if step_idx < len(pipeline.denoising_step_list) - 1:
+                next_timestep = pipeline.denoising_step_list[step_idx + 1]
+                noisy_input = pipeline.scheduler.add_noise(
+                    denoised_pred.flatten(0, 1), torch.randn_like(denoised_pred.flatten(0, 1)),
+                    next_timestep * torch.ones([1 * current_num_frames], device=noise.device, dtype=torch.long)
+                ).unflatten(0, denoised_pred.shape[:2])
+        if idx < len(all_num_frames) - 1:
+            pipeline.generator(
+                noisy_image_or_video=denoised_pred, conditional_dict=conditional_dict,
+                timestep=torch.zeros_like(timestep), kv_cache=pipeline.kv_cache1,
+                crossattn_cache=pipeline.crossattn_cache,
+                current_start=current_start_frame * pipeline.frame_seq_length,
+            )
+        # Decode to pixels
+        if args.trt:
+            pixels, vae_cache = pipeline.vae.forward(denoised_pred.half(), *vae_cache)
+        elif APP_STATE["current_use_taehv"]:
+            if latents_cache is None:
+                latents_cache = denoised_pred
+            else:
+                denoised_pred = torch.cat([latents_cache, denoised_pred], dim=1)
+                latents_cache = denoised_pred[:, -3:]
+            pixels = pipeline.vae.decode(denoised_pred)
+        else:
+            pixels, vae_cache = pipeline.vae(denoised_pred.half(), *vae_cache)
+        # Handle frame skipping
+        if idx == 0 and not args.trt:
+            pixels = pixels[:, 3:]
+        elif APP_STATE["current_use_taehv"] and idx > 0:
+            pixels = pixels[:, 12:]
+        print(f"🔍 DEBUG Block {idx}: Pixels shape after skipping: {pixels.shape}")
+        # Process all frames from this block at once
+        all_frames_from_block = []
+        for frame_idx in range(pixels.shape[1]):
+            frame_tensor = pixels[0, frame_idx]
+            # Convert to numpy (HWC, RGB, uint8)
+            frame_np = torch.clamp(frame_tensor.float(), -1., 1.) * 127.5 + 127.5
+            frame_np = frame_np.to(torch.uint8).cpu().numpy()
+            frame_np = np.transpose(frame_np, (1, 2, 0))  # CHW -> HWC
+            all_frames_from_block.append(frame_np)
+            total_frames_yielded += 1
+            # Yield status update for each frame (cute tracking!)
+            blocks_completed = idx
+            current_block_progress = (frame_idx + 1) / pixels.shape[1]
+            total_progress = (blocks_completed + current_block_progress) / num_blocks * 100
+            # Cap at 100% to avoid going over
+            total_progress = min(total_progress, 100.0)
+            frame_status_html = (
+                f"<div style='padding: 10px; border: 1px solid #ddd; border-radius: 8px; font-family: sans-serif;'>"
+                f"  <p style='margin: 0 0 8px 0; font-size: 16px; font-weight: bold;'>Generating Video...</p>"
+                f"  <div style='background: #e9ecef; border-radius: 4px; width: 100%; overflow: hidden;'>"
+                f"    <div style='width: {total_progress:.1f}%; height: 20px; background-color: #0d6efd; transition: width 0.2s;'></div>"
+                f"  </div>"
+                f"  <p style='margin: 8px 0 0 0; color: #555; font-size: 14px; text-align: right;'>"
+                f"    Block {idx+1}/{num_blocks}   |   Frame {total_frames_yielded}   |   {total_progress:.1f}%"
+                f"  </p>"
+                f"</div>"
+            )
+            # Yield None for video but update status (frame-by-frame tracking)
+            yield None, frame_status_html
+        # Encode entire block as one chunk
+        if all_frames_from_block:
+            print(f"📹 Encoding block {idx} with {len(all_frames_from_block)} frames")
+            try:
+                chunk_uuid = str(uuid.uuid4())[:8]
+                ts_filename = f"block_{idx:04d}_{chunk_uuid}.ts"
+                ts_path = os.path.join("gradio_tmp", ts_filename)
+                frames_to_ts_file(all_frames_from_block, ts_path, fps)
+                # Calculate final progress for this block
+                total_progress = (idx + 1) / num_blocks * 100
+                # Yield the actual video chunk
+                yield ts_path, gr.update()
+            except Exception as e:
+                print(f"⚠️ Error encoding block {idx}: {e}")
+                import traceback
+                traceback.print_exc()
+        current_start_frame += current_num_frames
+    # Final completion status
+    final_status_html = (
+        f"<div style='padding: 16px; border: 1px solid #198754; background: linear-gradient(135deg, #d1e7dd, #f8f9fa); border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);'>"
+        f"  <div style='display: flex; align-items: center; margin-bottom: 8px;'>"
+        f"    <span style='font-size: 24px; margin-right: 12px;'>🎉</span>"
+        f"    <h4 style='margin: 0; color: #0f5132; font-size: 18px;'>Stream Complete!</h4>"
+        f"  </div>"
+        f"  <div style='background: rgba(255,255,255,0.7); padding: 8px; border-radius: 4px;'>"
+        f"    <p style='margin: 0; color: #0f5132; font-weight: 500;'>"
+        f"      📊 Generated {total_frames_yielded} frames across {num_blocks} blocks"
+        f"    </p>"
+        f"    <p style='margin: 4px 0 0 0; color: #0f5132; font-size: 14px;'>"
+        f"      🎬 Playback: {fps} FPS • 📁 Format: MPEG-TS/H.264"
+        f"    </p>"
+        f"  </div>"
+        f"</div>"
+    )
+    yield None, final_status_html
+    print(f"✅ PyAV streaming complete! {total_frames_yielded} frames across {num_blocks} blocks")
+# --- Gradio UI Layout ---
+with gr.Blocks(title="Wan2.1 1.3B LoRA Self-Forcing streaming demo") as demo:
+    gr.Markdown("# 🚀 Run Any LoRA in near real-time!")
+    gr.Markdown("Real-time video generation with distilled Wan2-1 1.3B and LoRA [[Model]](https://huggingface.co/gdhe17/Self-Forcing), [[Project page]](https://self-forcing.github.io), [[Paper]](https://huggingface.co/papers/2506.08009)")
+    with gr.Tabs():
+        # LoRA Upload Tab
+        with gr.TabItem("1️⃣ Upload LoRA"):
+            gr.Markdown("## Upload LoRA Weights")
+            gr.Markdown("Upload your custom LoRA weights file to use for generation. The file will be automatically stored and you'll receive a unique hash-based ID.")
+            with gr.Row():
+                lora_file = gr.File(label="LoRA File (safetensors format)")
+            with gr.Row():
+                lora_id_output = gr.Textbox(label="LoRA Hash ID (use this in the generation tab)", interactive=False)
+        # Video Generation Tab
+        with gr.TabItem("2️⃣ Generate Video"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    with gr.Group():
+                        prompt = gr.Textbox(
+                            label="Prompt",
+                            placeholder="A stylish woman walks down a Tokyo street...",
+                            lines=4,
+                            value=""
+                        )
+                    start_btn = gr.Button("🎬 Start Streaming", variant="primary", size="lg")
+                    gr.Markdown("### ⚙️ Settings")
+                    with gr.Row():
+                        seed = gr.Number(
+                            label="Seed",
+                            value=-1,
+                            info="Use -1 for random seed",
+                            precision=0
+                        )
+                        fps = gr.Slider(
+                            label="Playback FPS",
+                            minimum=1,
+                            maximum=30,
+                            value=args.fps,
+                            step=1,
+                            visible=False,
+                            info="Frames per second for playback"
+                        )
+                    with gr.Row():
+                        duration = gr.Slider(
+                            label="Duration (seconds)",
+                            minimum=1,
+                            maximum=5,
+                            value=3,
+                            step=1,
+                            info="Video duration in seconds"
+                        )
+                    with gr.Row():
+                        width = gr.Slider(
+                            label="Width",
+                            minimum=224,
+                            maximum=720,
+                            value=400,
+                            step=8,
+                            info="Video width in pixels (8px steps)"
+                        )
+                        height = gr.Slider(
+                            label="Height",
+                            minimum=224,
+                            maximum=720,
+                            value=224,
+                            step=8,
+                            info="Video height in pixels (8px steps)"
+                        )
+                    gr.Markdown("### 🎨 LoRA Settings")
+                    lora_id = gr.Textbox(
+                        label="LoRA ID (from upload tab)",
+                        placeholder="Enter your LoRA ID here...",
+                    )
+                    lora_weight = gr.Slider(
+                        label="LoRA Weight",
+                        minimum=0.0,
+                        maximum=1.0,
+                        step=0.01,
+                        value=1.0,
+                        info="Strength of LoRA influence"
+                    )
+                with gr.Column(scale=3):
+                    gr.Markdown("### 📺 Video Stream")
+                    streaming_video = gr.Video(
+                        label="Live Stream",
+                        streaming=True,
+                        loop=True,
+                        height=400,
+                        autoplay=True,
+                        show_label=False
+                    )
+                    status_display = gr.HTML(
+                        value=(
+                            "<div style='text-align: center; padding: 20px; color: #666; border: 1px dashed #ddd; border-radius: 8px;'>"
+                            "🎬 Ready to start streaming...<br>"
+                            "<small>Configure your prompt and click 'Start Streaming'</small>"
+                            "</div>"
+                        ),
+                        label="Generation Status"
+                    )
+    # Connect the generator to the streaming video
+    start_btn.click(
+        fn=video_generation_handler_streaming,
+        inputs=[prompt, seed, fps, width, height, duration, lora_id, lora_weight],
+        outputs=[streaming_video, status_display]
+    )
+    # Connect LoRA upload to both display fields
+    lora_file.change(
+        fn=upload_lora_file,
+        inputs=[lora_file],
+        outputs=[lora_id_output, lora_id]
+    )
+# --- Launch App ---
+if __name__ == "__main__":
+    if os.path.exists("gradio_tmp"):
+        import shutil
+        shutil.rmtree("gradio_tmp")
+    os.makedirs("gradio_tmp", exist_ok=True)
+    print("🚀 Starting Self-Forcing Streaming Demo")
+    print(f"📁 Temporary files will be stored in: gradio_tmp/")
+    print(f"🎯 Chunk encoding: PyAV (MPEG-TS/H.264)")
+    print(f"⚡ GPU acceleration: {gpu}")
+    demo.queue().launch(
+        server_name=args.host,
+        server_port=args.port,
+        share=args.share,
+        show_error=True,
+        max_threads=40,
+        mcp_server=True
+    )