Spaces:

jbilcke-hf
/

VideoModelStudio

Running

App Files Files Community

jbilcke-hf HF Staff commited on Mar 16

Commit

9000726

1 Parent(s): 2bdf2d8

workaround for Finetrainers

Browse files

Files changed (4) hide show

finetrainers/data/dataset.py +56 -6
finetrainers/trainer/sft_trainer/trainer.py +14 -1
vms/ui/app_ui.py +5 -5
vms/ui/project/tabs/train_tab.py +4 -4

finetrainers/data/dataset.py CHANGED Viewed

@@ -970,9 +970,59 @@ def _preprocess_image(image: PIL.Image.Image) -> torch.Tensor:
     image = image.permute(2, 0, 1).contiguous() / 127.5 - 1.0
     return image
-def _preprocess_video(video: decord.VideoReader) -> torch.Tensor:
-    video = video.get_batch(list(range(len(video))))
-    video = video.permute(0, 3, 1, 2).contiguous()
-    video = video.float() / 127.5 - 1.0
-    return video

     image = image.permute(2, 0, 1).contiguous() / 127.5 - 1.0
     return image
+def _preprocess_video(video) -> torch.Tensor:
+    import torch
+    import numpy as np
+    # For decord VideoReader
+    if hasattr(video, 'get_batch') and 'decord' in str(type(video)):
+        video = video.get_batch(list(range(len(video))))
+        video = video.permute(0, 3, 1, 2).contiguous() / 127.5 - 1.0
+        return video
+    # For torchvision VideoReader
+    elif 'torchvision.io.video_reader' in str(type(video)):
+        # Use the correct iteration pattern for torchvision.io.VideoReader
+        frames = []
+        try:
+            # First seek to the beginning
+            video.seek(0)
+            # Then collect frames by iterating
+            for _ in range(30):  # Try to get a reasonable number of frames
+                try:
+                    frame_dict = next(video)
+                    frame = frame_dict["data"]  # Extract the tensor data from the dict
+                    frames.append(frame)
+                except StopIteration:
+                    break
+        except Exception as e:
+            print(f"Error iterating VideoReader: {e}")
+        if frames:
+            # In torchvision.io.VideoReader, frames are already in [C, H, W] format
+            # We need to stack and convert to [B, C, H, W]
+            stacked_frames = torch.stack(frames)
+            # Normalize to [-1, 1]
+            stacked_frames = stacked_frames.float() / 127.5 - 1.0
+            return stacked_frames
+        # If we couldn't get frames, create a dummy tensor
+        print("Failed to get frames, creating dummy tensor")
+        return torch.zeros(16, 3, 512, 768).float()
+    # For list of PIL images
+    elif isinstance(video, list) and len(video) > 0 and hasattr(video[0], 'convert'):
+        frames = []
+        for img in video:
+            img_tensor = torch.from_numpy(np.array(img.convert("RGB"))).float()
+            frames.append(img_tensor)
+        video = torch.stack(frames)
+        video = video.permute(0, 3, 1, 2).contiguous() / 127.5 - 1.0
+        return video
+    # Unknown type
+    else:
+        print(f"Unknown video type: {type(video)}")
+        return torch.zeros(16, 3, 512, 768).float()

finetrainers/trainer/sft_trainer/trainer.py CHANGED Viewed

@@ -325,8 +325,21 @@ class SFTTrainer:
         resume_from_checkpoint = self.args.resume_from_checkpoint
         if resume_from_checkpoint == "latest":
             resume_from_checkpoint = -1
         if resume_from_checkpoint is not None:
-            self.checkpointer.load(resume_from_checkpoint)
     def _train(self) -> None:
         logger.info("Starting training")

         resume_from_checkpoint = self.args.resume_from_checkpoint
         if resume_from_checkpoint == "latest":
             resume_from_checkpoint = -1
+        # Store the load result
+        load_successful = False
         if resume_from_checkpoint is not None:
+            load_successful = self.checkpointer.load(resume_from_checkpoint)
+        # If loading succeeded and we have a specific checkpoint path
+        if load_successful and isinstance(resume_from_checkpoint, str) and resume_from_checkpoint != "latest":
+            try:
+                step = int(resume_from_checkpoint.split("_")[-1])
+                self.state.train_state.step = step
+                logger.info(f"Explicitly setting training step to {step} based on checkpoint path")
+            except (ValueError, IndexError):
+                logger.warning(f"Could not parse step number from checkpoint path: {resume_from_checkpoint}")
     def _train(self) -> None:
         logger.info("Starting training")

vms/ui/app_ui.py CHANGED Viewed

@@ -146,7 +146,7 @@ class AppUI:
                 # Sidebar for navigation
                 with gr.Sidebar(position="left", open=True):
                     gr.Markdown("# 🎞️ Video Model Studio")
-                    self.components["current_project_btn"] = gr.Button("📂 Current Project", variant="primary")
                     self.components["system_monitoring_btn"] = gr.Button("🌡️ System Monitoring")
                 # Main content area with tabs
@@ -156,7 +156,7 @@ class AppUI:
                         self.main_tabs = main_tabs
                         # Project View Tab
-                        with gr.Tab("📁 Current Project", id=0) as project_view:
                             # Create project tabs
                             with gr.Tabs() as project_tabs:
                                 # Store reference to project tabs component
@@ -551,20 +551,20 @@ class AppUI:
             if is_training:
                 # Active training detected
                 start_btn_props = {"interactive": False, "variant": "secondary", "value": "🚀 Start new training"}
-                resume_btn_props = {"interactive": False, "variant": "secondary", "value": "🛰️ Start from latest checkpoint"}
                 stop_btn_props = {"interactive": True, "variant": "primary", "value": "Stop at Last Checkpoint"}
                 delete_btn_props = {"interactive": False, "variant": "stop", "value": "Delete All Checkpoints"}
             else:
                 # No active training
                 start_btn_props = {"interactive": True, "variant": "primary", "value": "🚀 Start new training"}
-                resume_btn_props = {"interactive": has_checkpoints, "variant": "primary", "value": "🛰️ Start from latest checkpoint"}
                 stop_btn_props = {"interactive": False, "variant": "secondary", "value": "Stop at Last Checkpoint"}
                 delete_btn_props = {"interactive": has_checkpoints, "variant": "stop", "value": "Delete All Checkpoints"}
         else:
             # Use button states from recovery, adding the new resume button
             start_btn_props = ui_updates.get("start_btn", {"interactive": True, "variant": "primary", "value": "🚀 Start new training"})
             resume_btn_props = {"interactive": has_checkpoints and not self.training.is_training_running(),
-                            "variant": "primary", "value": "🛰️ Start from latest checkpoint"}
             stop_btn_props = ui_updates.get("stop_btn", {"interactive": False, "variant": "secondary", "value": "Stop at Last Checkpoint"})
             delete_btn_props = ui_updates.get("delete_checkpoints_btn", {"interactive": has_checkpoints, "variant": "stop", "value": "Delete All Checkpoints"})

                 # Sidebar for navigation
                 with gr.Sidebar(position="left", open=True):
                     gr.Markdown("# 🎞️ Video Model Studio")
+                    self.components["current_project_btn"] = gr.Button("📂 New Project", variant="primary")
                     self.components["system_monitoring_btn"] = gr.Button("🌡️ System Monitoring")
                 # Main content area with tabs
                         self.main_tabs = main_tabs
                         # Project View Tab
+                        with gr.Tab("📁 New Project", id=0) as project_view:
                             # Create project tabs
                             with gr.Tabs() as project_tabs:
                                 # Store reference to project tabs component
             if is_training:
                 # Active training detected
                 start_btn_props = {"interactive": False, "variant": "secondary", "value": "🚀 Start new training"}
+                resume_btn_props = {"interactive": False, "variant": "secondary", "value": "🛸 Start from latest checkpoint"}
                 stop_btn_props = {"interactive": True, "variant": "primary", "value": "Stop at Last Checkpoint"}
                 delete_btn_props = {"interactive": False, "variant": "stop", "value": "Delete All Checkpoints"}
             else:
                 # No active training
                 start_btn_props = {"interactive": True, "variant": "primary", "value": "🚀 Start new training"}
+                resume_btn_props = {"interactive": has_checkpoints, "variant": "primary", "value": "🛸 Start from latest checkpoint"}
                 stop_btn_props = {"interactive": False, "variant": "secondary", "value": "Stop at Last Checkpoint"}
                 delete_btn_props = {"interactive": has_checkpoints, "variant": "stop", "value": "Delete All Checkpoints"}
         else:
             # Use button states from recovery, adding the new resume button
             start_btn_props = ui_updates.get("start_btn", {"interactive": True, "variant": "primary", "value": "🚀 Start new training"})
             resume_btn_props = {"interactive": has_checkpoints and not self.training.is_training_running(),
+                            "variant": "primary", "value": "🛸 Start from latest checkpoint"}
             stop_btn_props = ui_updates.get("stop_btn", {"interactive": False, "variant": "secondary", "value": "Stop at Last Checkpoint"})
             delete_btn_props = ui_updates.get("delete_checkpoints_btn", {"interactive": has_checkpoints, "variant": "stop", "value": "Delete All Checkpoints"})

vms/ui/project/tabs/train_tab.py CHANGED Viewed

@@ -187,8 +187,8 @@ class TrainTab(BaseTab):
                                 # Add description of the training buttons
                                 self.components["training_buttons_info"] = gr.Markdown("""
                                 ## ⚗️ Train your model on your dataset
-                                - **Start new training**: Begins training from scratch (clears previous checkpoints)
-                                - **Start from latest checkpoint**: Continues training from the most recent checkpoint
                                 """)
                                 with gr.Row():
@@ -204,7 +204,7 @@ class TrainTab(BaseTab):
                                     # Add new button for continuing from checkpoint
                                     self.components["resume_btn"] = gr.Button(
-                                        "🛰️ Start from latest checkpoint",
                                         variant="primary",
                                         interactive=has_checkpoints and not ASK_USER_TO_DUPLICATE_SPACE
                                     )
@@ -972,7 +972,7 @@ class TrainTab(BaseTab):
         )
         resume_btn = gr.Button(
-            value="Start from latest checkpoint",
             interactive=has_checkpoints and not is_training,
             variant="primary" if not is_training else "secondary"
         )

                                 # Add description of the training buttons
                                 self.components["training_buttons_info"] = gr.Markdown("""
                                 ## ⚗️ Train your model on your dataset
+                                - **🚀 Start new training**: Begins training from scratch (clears previous checkpoints)
+                                - **🛸 Start from latest checkpoint**: Continues training from the most recent checkpoint
                                 """)
                                 with gr.Row():
                                     # Add new button for continuing from checkpoint
                                     self.components["resume_btn"] = gr.Button(
+                                        "🛸 Start from latest checkpoint",
                                         variant="primary",
                                         interactive=has_checkpoints and not ASK_USER_TO_DUPLICATE_SPACE
                                     )
         )
         resume_btn = gr.Button(
+            value="🛸 Start from latest checkpoint",
             interactive=has_checkpoints and not is_training,
             variant="primary" if not is_training else "secondary"
         )