Spaces:

jbilcke-hf
/

VideoModelStudio

Running

App Files Files Community

jbilcke-hf HF Staff commited on Mar 11

Commit

b91a6aa

1 Parent(s): 5b7dcad

debugging the preview tab

Browse files

Files changed (3) hide show

vms/config.py +40 -1
vms/services/previewing.py +110 -44
vms/tabs/preview_tab.py +330 -27

vms/config.py CHANGED Viewed

@@ -58,7 +58,6 @@ if NORMALIZE_IMAGES_TO not in ['png', 'jpg']:
     raise ValueError("NORMALIZE_IMAGES_TO must be either 'png' or 'jpg'")
 JPEG_QUALITY = int(os.environ.get('JPEG_QUALITY', '97'))
-# Expanded model types to include Wan-2.1-T2V
 MODEL_TYPES = {
     "HunyuanVideo": "hunyuan_video",
     "LTX-Video": "ltx_video",
@@ -71,6 +70,46 @@ TRAINING_TYPES = {
     "Full Finetune": "full-finetune"
 }
 DEFAULT_SEED = 42
 DEFAULT_REMOVE_COMMON_LLM_CAPTION_PREFIXES = True

     raise ValueError("NORMALIZE_IMAGES_TO must be either 'png' or 'jpg'")
 JPEG_QUALITY = int(os.environ.get('JPEG_QUALITY', '97'))
 MODEL_TYPES = {
     "HunyuanVideo": "hunyuan_video",
     "LTX-Video": "ltx_video",
     "Full Finetune": "full-finetune"
 }
+# Model variants for each model type
+MODEL_VARIANTS = {
+    "wan": {
+        "Wan-AI/Wan2.1-T2V-1.3B-Diffusers": {
+            "name": "Wan 2.1 T2V 1.3B (text-only, smaller)",
+            "type": "text-to-video",
+            "description": "Faster, smaller model (1.3B parameters)"
+        },
+        "Wan-AI/Wan2.1-T2V-14B-Diffusers": {
+            "name": "Wan 2.1 T2V 14B (text-only, larger)",
+            "type": "text-to-video",
+            "description": "Higher quality but slower (14B parameters)"
+        },
+        "Wan-AI/Wan2.1-I2V-14B-480P-Diffusers": {
+            "name": "Wan 2.1 I2V 480p (image+text)",
+            "type": "image-to-video",
+            "description": "Image conditioning at 480p resolution"
+        },
+        "Wan-AI/Wan2.1-I2V-14B-720P-Diffusers": {
+            "name": "Wan 2.1 I2V 720p (image+text)",
+            "type": "image-to-video",
+            "description": "Image conditioning at 720p resolution"
+        }
+    },
+    "ltx_video": {
+        "Lightricks/LTX-Video": {
+            "name": "LTX Video (official)",
+            "type": "text-to-video",
+            "description": "Official LTX Video model"
+        }
+    },
+    "hunyuan_video": {
+        "hunyuanvideo-community/HunyuanVideo": {
+            "name": "Hunyuan Video (official)",
+            "type": "text-to-video",
+            "description": "Official Hunyuan Video model"
+        }
+    }
+}
 DEFAULT_SEED = 42
 DEFAULT_REMOVE_COMMON_LLM_CAPTION_PREFIXES = True

vms/services/previewing.py CHANGED Viewed

@@ -6,13 +6,13 @@ Handles the video generation logic and model integration
 import logging
 import tempfile
-import torch
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Tuple, Callable
 from vms.config import (
     OUTPUT_PATH, STORAGE_PATH, MODEL_TYPES, TRAINING_PATH,
-    DEFAULT_PROMPT_PREFIX
 )
 from vms.utils import format_time
@@ -48,9 +48,14 @@ class PreviewingService:
             logger.error(f"Error finding LoRA weights: {e}")
             return None
     def generate_video(
         self,
         model_type: str,
         prompt: str,
         negative_prompt: str,
         prompt_prefix: str,
@@ -62,7 +67,8 @@ class PreviewingService:
         lora_weight: float,
         inference_steps: int,
         enable_cpu_offload: bool,
-        fps: int
     ) -> Tuple[Optional[str], str, str]:
         """Generate a video using the trained model"""
         try:
@@ -71,6 +77,7 @@ class PreviewingService:
             def log(msg: str):
                 log_messages.append(msg)
                 logger.info(msg)
                 return "\n".join(log_messages)
             # Find latest LoRA weights
@@ -95,7 +102,30 @@ class PreviewingService:
             if not internal_model_type:
                 return None, f"Error: Invalid model type {model_type}", log(f"Error: Invalid model type {model_type}")
             log(f"Generating video with model type: {internal_model_type}")
             log(f"Using LoRA weights from: {lora_path}")
             log(f"Resolution: {width}x{height}, Frames: {num_frames}, FPS: {fps}")
             log(f"Guidance Scale: {guidance_scale}, Flow Shift: {flow_shift}, LoRA Weight: {lora_weight}")
@@ -107,19 +137,22 @@ class PreviewingService:
                 return self.generate_wan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
-                    inference_steps, enable_cpu_offload, fps, log
                 )
             elif internal_model_type == "ltx_video":
                 return self.generate_ltx_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
-                    inference_steps, enable_cpu_offload, fps, log
                 )
             elif internal_model_type == "hunyuan_video":
                 return self.generate_hunyuan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
-                    inference_steps, enable_cpu_offload, fps, log
                 )
             else:
                 return None, f"Error: Unsupported model type {internal_model_type}", log(f"Error: Unsupported model type {internal_model_type}")
@@ -142,28 +175,31 @@ class PreviewingService:
         inference_steps: int,
         enable_cpu_offload: bool,
         fps: int,
-        log_fn: Callable
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using Wan model"""
-        start_time = torch.cuda.Event(enable_timing=True)
-        end_time = torch.cuda.Event(enable_timing=True)
         try:
             import torch
             from diffusers import AutoencoderKLWan, WanPipeline
             from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
             from diffusers.utils import export_to_video
             log_fn("Importing Wan model components...")
-            # Use the smaller model for faster inference
-            model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
-            log_fn(f"Loading VAE from {model_id}...")
-            vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
-            log_fn(f"Loading transformer from {model_id}...")
-            pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
             log_fn(f"Configuring scheduler with flow_shift={flow_shift}...")
             pipe.scheduler = UniPCMultistepScheduler.from_config(
@@ -189,15 +225,36 @@ class PreviewingService:
             log_fn("Starting video generation...")
             start_time.record()
-            output = pipe(
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                height=height,
-                width=width,
-                num_frames=num_frames,
-                guidance_scale=guidance_scale,
-                num_inference_steps=inference_steps,
-            ).frames[0]
             end_time.record()
             torch.cuda.synchronize()
@@ -236,23 +293,25 @@ class PreviewingService:
         inference_steps: int,
         enable_cpu_offload: bool,
         fps: int,
-        log_fn: Callable
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using LTX model"""
-        start_time = torch.cuda.Event(enable_timing=True)
-        end_time = torch.cuda.Event(enable_timing=True)
         try:
             import torch
             from diffusers import LTXPipeline
             from diffusers.utils import export_to_video
             log_fn("Importing LTX model components...")
-            model_id = "Lightricks/LTX-Video"
-            log_fn(f"Loading pipeline from {model_id}...")
-            pipe = LTXPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
             log_fn("Moving pipeline to CUDA device...")
             pipe.to("cuda")
@@ -272,6 +331,7 @@ class PreviewingService:
             log_fn("Starting video generation...")
             start_time.record()
             video = pipe(
                 prompt=prompt,
                 negative_prompt=negative_prompt,
@@ -321,31 +381,33 @@ class PreviewingService:
         inference_steps: int,
         enable_cpu_offload: bool,
         fps: int,
-        log_fn: Callable
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using HunyuanVideo model"""
-        start_time = torch.cuda.Event(enable_timing=True)
-        end_time = torch.cuda.Event(enable_timing=True)
         try:
             import torch
             from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel, AutoencoderKLHunyuanVideo
             from diffusers.utils import export_to_video
             log_fn("Importing HunyuanVideo model components...")
-            model_id = "hunyuanvideo-community/HunyuanVideo"
-            log_fn(f"Loading transformer from {model_id}...")
             transformer = HunyuanVideoTransformer3DModel.from_pretrained(
-                model_id,
                 subfolder="transformer",
                 torch_dtype=torch.bfloat16
             )
-            log_fn(f"Loading pipeline from {model_id}...")
             pipe = HunyuanVideoPipeline.from_pretrained(
-                model_id,
                 transformer=transformer,
                 torch_dtype=torch.float16
             )
@@ -371,9 +433,13 @@ class PreviewingService:
             log_fn("Starting video generation...")
             start_time.record()
             output = pipe(
                 prompt=prompt,
-                negative_prompt=negative_prompt if negative_prompt else None,
                 height=height,
                 width=width,
                 num_frames=num_frames,

 import logging
 import tempfile
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Tuple, Callable
+import time
 from vms.config import (
     OUTPUT_PATH, STORAGE_PATH, MODEL_TYPES, TRAINING_PATH,
+    DEFAULT_PROMPT_PREFIX, MODEL_VARIANTS
 )
 from vms.utils import format_time
             logger.error(f"Error finding LoRA weights: {e}")
             return None
+    def get_model_variants(self, model_type: str) -> Dict[str, Dict[str, str]]:
+        """Get available model variants for the given model type"""
+        return MODEL_VARIANTS.get(model_type, {})
     def generate_video(
         self,
         model_type: str,
+        model_variant: str,
         prompt: str,
         negative_prompt: str,
         prompt_prefix: str,
         lora_weight: float,
         inference_steps: int,
         enable_cpu_offload: bool,
+        fps: int,
+        conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate a video using the trained model"""
         try:
             def log(msg: str):
                 log_messages.append(msg)
                 logger.info(msg)
+                # Return updated log string for UI updates
                 return "\n".join(log_messages)
             # Find latest LoRA weights
             if not internal_model_type:
                 return None, f"Error: Invalid model type {model_type}", log(f"Error: Invalid model type {model_type}")
+            # Check if model variant is valid for this model type
+            variants = self.get_model_variants(internal_model_type)
+            if model_variant not in variants:
+                # Use default variant if specified one is invalid
+                if len(variants) > 0:
+                    model_variant = next(iter(variants.keys()))
+                    log(f"Warning: Invalid model variant, using default: {model_variant}")
+                else:
+                    # Fall back to default IDs if no variants defined
+                    if internal_model_type == "wan":
+                        model_variant = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
+                    elif internal_model_type == "ltx_video":
+                        model_variant = "Lightricks/LTX-Video"
+                    elif internal_model_type == "hunyuan_video":
+                        model_variant = "hunyuanvideo-community/HunyuanVideo"
+                    log(f"Warning: No variants defined for model type, using default: {model_variant}")
+            # Check if this is an image-to-video model but no image was provided
+            variant_info = variants.get(model_variant, {})
+            if variant_info.get("type") == "image-to-video" and not conditioning_image:
+                return None, "Error: This model requires a conditioning image", log("Error: This model variant requires a conditioning image but none was provided")
             log(f"Generating video with model type: {internal_model_type}")
+            log(f"Using model variant: {model_variant}")
             log(f"Using LoRA weights from: {lora_path}")
             log(f"Resolution: {width}x{height}, Frames: {num_frames}, FPS: {fps}")
             log(f"Guidance Scale: {guidance_scale}, Flow Shift: {flow_shift}, LoRA Weight: {lora_weight}")
                 return self.generate_wan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
+                    inference_steps, enable_cpu_offload, fps, log,
+                    model_variant, conditioning_image
                 )
             elif internal_model_type == "ltx_video":
                 return self.generate_ltx_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
+                    inference_steps, enable_cpu_offload, fps, log,
+                    model_variant, conditioning_image
                 )
             elif internal_model_type == "hunyuan_video":
                 return self.generate_hunyuan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
+                    inference_steps, enable_cpu_offload, fps, log,
+                    model_variant, conditioning_image
                 )
             else:
                 return None, f"Error: Unsupported model type {internal_model_type}", log(f"Error: Unsupported model type {internal_model_type}")
         inference_steps: int,
         enable_cpu_offload: bool,
         fps: int,
+        log_fn: Callable,
+        model_variant: str = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
+        conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using Wan model"""
         try:
             import torch
             from diffusers import AutoencoderKLWan, WanPipeline
             from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
             from diffusers.utils import export_to_video
+            from PIL import Image
+            import os
+            start_time = torch.cuda.Event(enable_timing=True)
+            end_time = torch.cuda.Event(enable_timing=True)
             log_fn("Importing Wan model components...")
+            log_fn(f"Loading VAE from {model_variant}...")
+            vae = AutoencoderKLWan.from_pretrained(model_variant, subfolder="vae", torch_dtype=torch.float32)
+            log_fn(f"Loading transformer from {model_variant}...")
+            pipe = WanPipeline.from_pretrained(model_variant, vae=vae, torch_dtype=torch.bfloat16)
             log_fn(f"Configuring scheduler with flow_shift={flow_shift}...")
             pipe.scheduler = UniPCMultistepScheduler.from_config(
             log_fn("Starting video generation...")
             start_time.record()
+            # Check if this is an image-to-video model
+            is_i2v = "I2V" in model_variant
+            if is_i2v and conditioning_image:
+                log_fn(f"Loading conditioning image from {conditioning_image}...")
+                image = Image.open(conditioning_image).convert("RGB")
+                image = image.resize((width, height))
+                log_fn("Generating video with image conditioning...")
+                output = pipe(
+                    prompt=prompt,
+                    negative_prompt=negative_prompt,
+                    image=image,
+                    height=height,
+                    width=width,
+                    num_frames=num_frames,
+                    guidance_scale=guidance_scale,
+                    num_inference_steps=inference_steps,
+                ).frames[0]
+            else:
+                log_fn("Generating video with text-only conditioning...")
+                output = pipe(
+                    prompt=prompt,
+                    negative_prompt=negative_prompt,
+                    height=height,
+                    width=width,
+                    num_frames=num_frames,
+                    guidance_scale=guidance_scale,
+                    num_inference_steps=inference_steps,
+                ).frames[0]
             end_time.record()
             torch.cuda.synchronize()
         inference_steps: int,
         enable_cpu_offload: bool,
         fps: int,
+        log_fn: Callable,
+        model_variant: str = "Lightricks/LTX-Video",
+        conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using LTX model"""
         try:
             import torch
             from diffusers import LTXPipeline
             from diffusers.utils import export_to_video
+            from PIL import Image
+            start_time = torch.cuda.Event(enable_timing=True)
+            end_time = torch.cuda.Event(enable_timing=True)
             log_fn("Importing LTX model components...")
+            log_fn(f"Loading pipeline from {model_variant}...")
+            pipe = LTXPipeline.from_pretrained(model_variant, torch_dtype=torch.bfloat16)
             log_fn("Moving pipeline to CUDA device...")
             pipe.to("cuda")
             log_fn("Starting video generation...")
             start_time.record()
+            # LTX doesn't currently support image conditioning in the standard way
             video = pipe(
                 prompt=prompt,
                 negative_prompt=negative_prompt,
         inference_steps: int,
         enable_cpu_offload: bool,
         fps: int,
+        log_fn: Callable,
+        model_variant: str = "hunyuanvideo-community/HunyuanVideo",
+        conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using HunyuanVideo model"""
         try:
             import torch
             from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel, AutoencoderKLHunyuanVideo
             from diffusers.utils import export_to_video
+            start_time = torch.cuda.Event(enable_timing=True)
+            end_time = torch.cuda.Event(enable_timing=True)
             log_fn("Importing HunyuanVideo model components...")
+            log_fn(f"Loading transformer from {model_variant}...")
             transformer = HunyuanVideoTransformer3DModel.from_pretrained(
+                model_variant,
                 subfolder="transformer",
                 torch_dtype=torch.bfloat16
             )
+            log_fn(f"Loading pipeline from {model_variant}...")
             pipe = HunyuanVideoPipeline.from_pretrained(
+                model_variant,
                 transformer=transformer,
                 torch_dtype=torch.float16
             )
             log_fn("Starting video generation...")
             start_time.record()
+            # Fix for Issue #2: The pipe() expected list rather than float
+            # Make sure negative_prompt is a list or None
+            neg_prompt = [negative_prompt] if negative_prompt else None
             output = pipe(
                 prompt=prompt,
+                negative_prompt=neg_prompt,
                 height=height,
                 width=width,
                 num_frames=num_frames,

vms/tabs/preview_tab.py CHANGED Viewed

@@ -6,9 +6,10 @@ import gradio as gr
 import logging
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Tuple
-from vms.tabs import BaseTab
-from vms.config import (
     MODEL_TYPES, DEFAULT_PROMPT_PREFIX
 )
@@ -21,10 +22,7 @@ class PreviewTab(BaseTab):
         super().__init__(app_state)
         self.id = "preview_tab"
         self.title = "6️⃣  Preview"
-        # Get reference to the preview service from app_state
-        self.previewing_service = app_state.previewing
     def create(self, parent=None) -> gr.TabItem:
         """Create the Preview tab UI components"""
         with gr.TabItem(self.title, id=self.id) as tab:
@@ -53,12 +51,32 @@ class PreviewTab(BaseTab):
                     )
                     with gr.Row():
                         self.components["model_type"] = gr.Dropdown(
                             choices=list(MODEL_TYPES.keys()),
-                            label="Model Type",
-                            value=list(MODEL_TYPES.keys())[0]
                         )
                         self.components["resolution_preset"] = gr.Dropdown(
                             choices=["480p", "720p"],
                             label="Resolution Preset",
@@ -150,15 +168,89 @@ class PreviewTab(BaseTab):
                         interactive=False
                     )
-                    with gr.Accordion("Log", open=False):
                         self.components["log"] = gr.TextArea(
                             label="Generation Log",
                             interactive=False,
-                            lines=10
                         )
         return tab
     def connect_events(self) -> None:
         """Connect event handlers to UI components"""
         # Update resolution when preset changes
@@ -172,11 +264,70 @@ class PreviewTab(BaseTab):
             ]
         )
         # Generate button click
         self.components["generate_btn"].click(
             fn=self.generate_video,
             inputs=[
                 self.components["model_type"],
                 self.components["prompt"],
                 self.components["negative_prompt"],
                 self.components["prompt_prefix"],
@@ -188,7 +339,8 @@ class PreviewTab(BaseTab):
                 self.components["lora_weight"],
                 self.components["inference_steps"],
                 self.components["enable_cpu_offload"],
-                self.components["fps"]
             ],
             outputs=[
                 self.components["preview_video"],
@@ -197,6 +349,23 @@ class PreviewTab(BaseTab):
             ]
         )
     def update_resolution(self, preset: str) -> Tuple[int, int, float]:
         """Update resolution and flow shift based on preset"""
         if preset == "480p":
@@ -206,9 +375,88 @@ class PreviewTab(BaseTab):
         else:
             return 832, 480, 3.0
     def generate_video(
         self,
         model_type: str,
         prompt: str,
         negative_prompt: str,
         prompt_prefix: str,
@@ -220,21 +468,76 @@ class PreviewTab(BaseTab):
         lora_weight: float,
         inference_steps: int,
         enable_cpu_offload: bool,
-        fps: int
     ) -> Tuple[Optional[str], str, str]:
         """Handler for generate button click, delegates to preview service"""
-        return self.preview_service.generate_video(
-            model_type=model_type,
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            prompt_prefix=prompt_prefix,
-            width=width,
-            height=height,
-            num_frames=num_frames,
-            guidance_scale=guidance_scale,
-            flow_shift=flow_shift,
-            lora_weight=lora_weight,
-            inference_steps=inference_steps,
-            enable_cpu_offload=enable_cpu_offload,
-            fps=fps
-        )

 import logging
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Tuple
+import time
+from .base_tab import BaseTab
+from ..config import (
     MODEL_TYPES, DEFAULT_PROMPT_PREFIX
 )
         super().__init__(app_state)
         self.id = "preview_tab"
         self.title = "6️⃣  Preview"
     def create(self, parent=None) -> gr.TabItem:
         """Create the Preview tab UI components"""
         with gr.TabItem(self.title, id=self.id) as tab:
                     )
                     with gr.Row():
+                        # Get the currently selected model type from training tab if possible
+                        default_model = self.get_default_model_type()
+                        # Make model_type read-only (disabled), as it must match what was trained
                         self.components["model_type"] = gr.Dropdown(
                             choices=list(MODEL_TYPES.keys()),
+                            label="Model Type (from training)",
+                            value=default_model,
+                            interactive=False
                         )
+                        # Add model variant selection based on model type
+                        self.components["model_variant"] = gr.Dropdown(
+                            label="Model Variant",
+                            choices=self.get_variant_choices(default_model),
+                            value=self.get_default_variant(default_model)
+                        )
+                    # Add image input for image-to-video models
+                    self.components["conditioning_image"] = gr.Image(
+                        label="Conditioning Image (for Image-to-Video models)",
+                        type="filepath",
+                        visible=False
+                    )
+                    with gr.Row():
                         self.components["resolution_preset"] = gr.Dropdown(
                             choices=["480p", "720p"],
                             label="Resolution Preset",
                         interactive=False
                     )
+                    with gr.Accordion("Log", open=True):
                         self.components["log"] = gr.TextArea(
                             label="Generation Log",
                             interactive=False,
+                            lines=15
                         )
         return tab
+    def get_variant_choices(self, model_type: str) -> List[str]:
+        """Get model variant choices based on model type"""
+        # Convert UI display name to internal name
+        internal_type = MODEL_TYPES.get(model_type)
+        if not internal_type:
+            return []
+        # Get variants from preview service
+        variants = self.app.previewing.get_model_variants(internal_type)
+        if not variants:
+            return []
+        # Format choices with display name and description
+        choices = []
+        for model_id, info in variants.items():
+            choices.append(f"{model_id} - {info.get('name', '')}")
+        return choices
+    def get_default_variant(self, model_type: str) -> str:
+        """Get default model variant for the model type"""
+        choices = self.get_variant_choices(model_type)
+        if choices:
+            return choices[0]
+        return ""
+    def get_default_model_type(self) -> str:
+        """Get the currently selected model type from training tab"""
+        try:
+            # Try to get the model type from UI state
+            ui_state = self.app.training.load_ui_state()
+            model_type = ui_state.get("model_type")
+            # Make sure it's a valid model type
+            if model_type in MODEL_TYPES:
+                return model_type
+            # If we couldn't get a valid model type, try to get it from the training tab directly
+            if hasattr(self.app, 'tabs') and 'train_tab' in self.app.tabs:
+                train_tab = self.app.tabs['train_tab']
+                if hasattr(train_tab, 'components') and 'model_type' in train_tab.components:
+                    train_model_type = train_tab.components['model_type'].value
+                    if train_model_type in MODEL_TYPES:
+                        return train_model_type
+            # Fallback to first model type
+            return list(MODEL_TYPES.keys())[0]
+        except Exception as e:
+            logger.warning(f"Failed to get default model type: {e}")
+            return list(MODEL_TYPES.keys())[0]
+    def extract_model_id(self, variant_choice: str) -> str:
+        """Extract model ID from variant choice string"""
+        if " - " in variant_choice:
+            return variant_choice.split(" - ")[0].strip()
+        return variant_choice
+    def get_variant_type(self, model_type: str, model_variant: str) -> str:
+        """Get the variant type (text-to-video or image-to-video)"""
+        # Convert UI display name to internal name
+        internal_type = MODEL_TYPES.get(model_type)
+        if not internal_type:
+            return "text-to-video"
+        # Extract model_id from variant choice
+        model_id = self.extract_model_id(model_variant)
+        # Get variants from preview service
+        variants = self.app.previewing.get_model_variants(internal_type)
+        variant_info = variants.get(model_id, {})
+        # Return the variant type or default to text-to-video
+        return variant_info.get("type", "text-to-video")
     def connect_events(self) -> None:
         """Connect event handlers to UI components"""
         # Update resolution when preset changes
             ]
         )
+        # Update model_variant choices when model_type changes or tab is selected
+        if hasattr(self.app, 'tabs_component') and self.app.tabs_component is not None:
+            self.app.tabs_component.select(
+                fn=self.sync_model_type_and_variants,
+                inputs=[],
+                outputs=[
+                    self.components["model_type"],
+                    self.components["model_variant"]
+                ]
+            )
+        # Update variant-specific UI elements when variant changes
+        self.components["model_variant"].change(
+            fn=self.update_variant_ui,
+            inputs=[
+                self.components["model_type"],
+                self.components["model_variant"]
+            ],
+            outputs=[
+                self.components["conditioning_image"]
+            ]
+        )
+        # Load preview UI state when the tab is selected
+        if hasattr(self.app, 'tabs_component') and self.app.tabs_component is not None:
+            self.app.tabs_component.select(
+                fn=self.load_preview_state,
+                inputs=[],
+                outputs=[
+                    self.components["prompt"],
+                    self.components["negative_prompt"],
+                    self.components["prompt_prefix"],
+                    self.components["width"],
+                    self.components["height"],
+                    self.components["num_frames"],
+                    self.components["fps"],
+                    self.components["guidance_scale"],
+                    self.components["flow_shift"],
+                    self.components["lora_weight"],
+                    self.components["inference_steps"],
+                    self.components["enable_cpu_offload"],
+                    self.components["model_variant"]
+                ]
+            )
+        # Save preview UI state when values change
+        for component_name in [
+            "prompt", "negative_prompt", "prompt_prefix", "model_variant", "resolution_preset",
+            "width", "height", "num_frames", "fps", "guidance_scale", "flow_shift",
+            "lora_weight", "inference_steps", "enable_cpu_offload"
+        ]:
+            if component_name in self.components:
+                self.components[component_name].change(
+                    fn=self.save_preview_state_value,
+                    inputs=[self.components[component_name]],
+                    outputs=[]
+                )
         # Generate button click
         self.components["generate_btn"].click(
             fn=self.generate_video,
             inputs=[
                 self.components["model_type"],
+                self.components["model_variant"],
                 self.components["prompt"],
                 self.components["negative_prompt"],
                 self.components["prompt_prefix"],
                 self.components["lora_weight"],
                 self.components["inference_steps"],
                 self.components["enable_cpu_offload"],
+                self.components["fps"],
+                self.components["conditioning_image"]
             ],
             outputs=[
                 self.components["preview_video"],
             ]
         )
+    def update_variant_ui(self, model_type: str, model_variant: str) -> Dict[str, Any]:
+        """Update UI based on the selected model variant"""
+        variant_type = self.get_variant_type(model_type, model_variant)
+        # Show conditioning image input only for image-to-video models
+        show_conditioning_image = variant_type == "image-to-video"
+        return {
+            self.components["conditioning_image"]: gr.Image(visible=show_conditioning_image)
+        }
+    def sync_model_type_and_variants(self) -> Tuple[str, str]:
+        """Sync model type with training tab when preview tab is selected and update variant choices"""
+        model_type = self.get_default_model_type()
+        model_variant = self.get_default_variant(model_type)
+        return model_type, model_variant
     def update_resolution(self, preset: str) -> Tuple[int, int, float]:
         """Update resolution and flow shift based on preset"""
         if preset == "480p":
         else:
             return 832, 480, 3.0
+    def load_preview_state(self) -> Tuple:
+        """Load saved preview UI state"""
+        # Try to get the saved state
+        try:
+            state = self.app.training.load_ui_state()
+            preview_state = state.get("preview", {})
+            # Get model type (can't be changed in UI)
+            model_type = self.get_default_model_type()
+            # If model_variant not in choices for current model_type, use default
+            model_variant = preview_state.get("model_variant", "")
+            variant_choices = self.get_variant_choices(model_type)
+            if model_variant not in variant_choices and variant_choices:
+                model_variant = variant_choices[0]
+            return (
+                preview_state.get("prompt", ""),
+                preview_state.get("negative_prompt", "worst quality, low quality, blurry, jittery, distorted, ugly, deformed, disfigured, messy background"),
+                preview_state.get("prompt_prefix", DEFAULT_PROMPT_PREFIX),
+                preview_state.get("width", 832),
+                preview_state.get("height", 480),
+                preview_state.get("num_frames", 49),
+                preview_state.get("fps", 16),
+                preview_state.get("guidance_scale", 5.0),
+                preview_state.get("flow_shift", 3.0),
+                preview_state.get("lora_weight", 0.7),
+                preview_state.get("inference_steps", 30),
+                preview_state.get("enable_cpu_offload", True),
+                model_variant
+            )
+        except Exception as e:
+            logger.error(f"Error loading preview state: {e}")
+            # Return defaults if loading fails
+            return (
+                "",
+                "worst quality, low quality, blurry, jittery, distorted, ugly, deformed, disfigured, messy background",
+                DEFAULT_PROMPT_PREFIX,
+                832, 480, 49, 16, 5.0, 3.0, 0.7, 30, True,
+                self.get_default_variant(self.get_default_model_type())
+            )
+    def save_preview_state_value(self, value: Any) -> None:
+        """Save an individual preview state value"""
+        try:
+            # Get the component name from the event context
+            import inspect
+            frame = inspect.currentframe()
+            frame = inspect.getouterframes(frame)[1]
+            event_context = frame.frame.f_locals
+            component = event_context.get('component')
+            if component is None:
+                return
+            # Find the component name
+            component_name = None
+            for name, comp in self.components.items():
+                if comp == component:
+                    component_name = name
+                    break
+            if component_name is None:
+                return
+            # Load current state
+            state = self.app.training.load_ui_state()
+            if "preview" not in state:
+                state["preview"] = {}
+            # Update the value
+            state["preview"][component_name] = value
+            # Save state
+            self.app.training.save_ui_state(state)
+        except Exception as e:
+            logger.error(f"Error saving preview state: {e}")
     def generate_video(
         self,
         model_type: str,
+        model_variant: str,
         prompt: str,
         negative_prompt: str,
         prompt_prefix: str,
         lora_weight: float,
         inference_steps: int,
         enable_cpu_offload: bool,
+        fps: int,
+        conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Handler for generate button click, delegates to preview service"""
+        # Save all the parameters to preview state before generating
+        try:
+            state = self.app.training.load_ui_state()
+            if "preview" not in state:
+                state["preview"] = {}
+            # Extract model ID from variant choice
+            model_variant_id = self.extract_model_id(model_variant)
+            # Update all values
+            preview_state = {
+                "prompt": prompt,
+                "negative_prompt": negative_prompt,
+                "prompt_prefix": prompt_prefix,
+                "model_type": model_type,
+                "model_variant": model_variant,
+                "width": width,
+                "height": height,
+                "num_frames": num_frames,
+                "fps": fps,
+                "guidance_scale": guidance_scale,
+                "flow_shift": flow_shift,
+                "lora_weight": lora_weight,
+                "inference_steps": inference_steps,
+                "enable_cpu_offload": enable_cpu_offload
+            }
+            state["preview"] = preview_state
+            self.app.training.save_ui_state(state)
+        except Exception as e:
+            logger.error(f"Error saving preview state before generation: {e}")
+        # Clear the log display at the start to make room for new logs
+        # Yield and sleep briefly to allow UI update
+        yield None, "Starting generation...", ""
+        time.sleep(0.1)
+        # Extract model ID from variant choice string
+        model_variant_id = self.extract_model_id(model_variant)
+        # Use streaming updates to provide real-time feedback during generation
+        def generate_with_updates():
+            # Initial UI update
+            yield None, "Initializing generation...", "Starting video generation process..."
+            # Start actual generation
+            result = self.app.previewing.generate_video(
+                model_type=model_type,
+                model_variant=model_variant_id,
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                prompt_prefix=prompt_prefix,
+                width=width,
+                height=height,
+                num_frames=num_frames,
+                guidance_scale=guidance_scale,
+                flow_shift=flow_shift,
+                lora_weight=lora_weight,
+                inference_steps=inference_steps,
+                enable_cpu_offload=enable_cpu_offload,
+                fps=fps,
+                conditioning_image=conditioning_image
+            )
+            # Return final result
+            return result
+        # Return the generator for streaming updates
+        return generate_with_updates()