Spaces:

jbilcke-hf
/

VideoModelStudio

Running

App Files Files Community

jbilcke-hf HF Staff commited on Feb 28

Commit

446e79f

1 Parent(s): 54a2a4e

working on fixes

Browse files

Files changed (2) hide show

app.py +72 -37
vms/training_service.py +10 -5

app.py CHANGED Viewed

@@ -77,12 +77,68 @@ class VideoTrainerUI:
             # UI will be in ready-to-start mode
     def update_ui_state(self, **kwargs):
         """Update UI state with new values"""
         current_state = self.trainer.load_ui_state()
         current_state.update(kwargs)
         self.trainer.save_ui_state(current_state)
-        return current_state
     def load_ui_values(self):
         """Load UI state values for initializing form fields"""
@@ -130,6 +186,19 @@ class VideoTrainerUI:
             )
         )
     def show_refreshing_status(self) -> List[List[str]]:
         """Show a 'Refreshing...' status in the dataframe"""
         return [["Refreshing...", "please wait"]]
@@ -1421,52 +1490,18 @@ class VideoTrainerUI:
                 ]
             )
-            # Add this new method to get initial button states:
-            def get_initial_button_states(self):
-                """Get the initial states for training buttons based on recovery status"""
-                recovery_result = self.trainer.recover_interrupted_training()
-                ui_updates = recovery_result.get("ui_updates", {})
-                # Return button states in the correct order
-                return (
-                    gr.Button(**ui_updates.get("start_btn", {"interactive": True, "variant": "primary"})),
-                    gr.Button(**ui_updates.get("stop_btn", {"interactive": False, "variant": "secondary"})),
-                    gr.Button(**ui_updates.get("pause_resume_btn", {"interactive": False, "variant": "secondary"}))
-                )
-            def initialize_ui_from_state(self):
-                """Initialize UI components from saved state"""
-                ui_state = self.load_ui_values()
-                # Return values in order matching the outputs in app.load
-                return (
-                    ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
-                    ui_state.get("model_type", list(MODEL_TYPES.keys())[0]),
-                    ui_state.get("lora_rank", "128"),
-                    ui_state.get("lora_alpha", "128"),
-                    ui_state.get("num_epochs", 70),
-                    ui_state.get("batch_size", 1),
-                    ui_state.get("learning_rate", 3e-5),
-                    ui_state.get("save_iterations", 500)
-                )
-            # Auto-refresh timers
             app.load(
-                fn=lambda: (
-                    self.refresh_dataset(),
-                    *self.get_initial_button_states(),
-                    # Load saved UI state values
-                    *self.initialize_ui_from_state()
-                ),
                 outputs=[
                     video_list, training_dataset,
                     start_btn, stop_btn, pause_resume_btn,
-                    # Add outputs for UI fields
                     training_preset, model_type, lora_rank, lora_alpha,
                     num_epochs, batch_size, learning_rate, save_iterations
                 ]
             )
             timer = gr.Timer(value=1)
             timer.tick(
                 fn=lambda: (

             # UI will be in ready-to-start mode
+    def initialize_app_state(self):
+        """Initialize all app state in one function to ensure correct output count"""
+        # Get dataset info
+        video_list, training_dataset = self.refresh_dataset()
+        # Get button states
+        button_states = self.get_initial_button_states()
+        start_btn = button_states[0]
+        stop_btn = button_states[1]
+        pause_resume_btn = button_states[2]
+        # Get UI form values
+        ui_state = self.load_ui_values()
+        training_preset = ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0])
+        model_type_val = ui_state.get("model_type", list(MODEL_TYPES.keys())[0])
+        lora_rank_val = ui_state.get("lora_rank", "128")
+        lora_alpha_val = ui_state.get("lora_alpha", "128")
+        num_epochs_val = int(ui_state.get("num_epochs", 70))
+        batch_size_val = int(ui_state.get("batch_size", 1))
+        learning_rate_val = float(ui_state.get("learning_rate", 3e-5))
+        save_iterations_val = int(ui_state.get("save_iterations", 500))
+        # Return all values in the exact order expected by outputs
+        return (
+            video_list,
+            training_dataset,
+            start_btn,
+            stop_btn,
+            pause_resume_btn,
+            training_preset,
+            model_type_val,
+            lora_rank_val,
+            lora_alpha_val,
+            num_epochs_val,
+            batch_size_val,
+            learning_rate_val,
+            save_iterations_val
+        )
+    def initialize_ui_from_state(self):
+        """Initialize UI components from saved state"""
+        ui_state = self.load_ui_values()
+        # Return values in order matching the outputs in app.load
+        return (
+            ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
+            ui_state.get("model_type", list(MODEL_TYPES.keys())[0]),
+            ui_state.get("lora_rank", "128"),
+            ui_state.get("lora_alpha", "128"),
+            ui_state.get("num_epochs", 70),
+            ui_state.get("batch_size", 1),
+            ui_state.get("learning_rate", 3e-5),
+            ui_state.get("save_iterations", 500)
+        )
     def update_ui_state(self, **kwargs):
         """Update UI state with new values"""
         current_state = self.trainer.load_ui_state()
         current_state.update(kwargs)
         self.trainer.save_ui_state(current_state)
+        # Don't return anything to avoid Gradio warnings
+        return None
     def load_ui_values(self):
         """Load UI state values for initializing form fields"""
             )
         )
+    # Add this new method to get initial button states:
+    def get_initial_button_states(self):
+        """Get the initial states for training buttons based on recovery status"""
+        recovery_result = self.trainer.recover_interrupted_training()
+        ui_updates = recovery_result.get("ui_updates", {})
+        # Return button states in the correct order
+        return (
+            gr.Button(**ui_updates.get("start_btn", {"interactive": True, "variant": "primary"})),
+            gr.Button(**ui_updates.get("stop_btn", {"interactive": False, "variant": "secondary"})),
+            gr.Button(**ui_updates.get("pause_resume_btn", {"interactive": False, "variant": "secondary"}))
+        )
     def show_refreshing_status(self) -> List[List[str]]:
         """Show a 'Refreshing...' status in the dataframe"""
         return [["Refreshing...", "please wait"]]
                 ]
             )
             app.load(
+                fn=self.initialize_app_state,
                 outputs=[
                     video_list, training_dataset,
                     start_btn, stop_btn, pause_resume_btn,
                     training_preset, model_type, lora_rank, lora_alpha,
                     num_epochs, batch_size, learning_rate, save_iterations
                 ]
             )
+            # Auto-refresh timers
             timer = gr.Timer(value=1)
             timer.tick(
                 fn=lambda: (

vms/training_service.py CHANGED Viewed

@@ -164,12 +164,11 @@ class TrainingService:
         if not self.status_file.exists():
             return default_status
         try:
             with open(self.status_file, 'r') as f:
                 status = json.load(f)
-                #print("status found in the json:", status)
             # Check if process is actually running
             if self.pid_file.exists():
                 with open(self.pid_file, 'r') as f:
@@ -177,14 +176,20 @@ class TrainingService:
                 if not psutil.pid_exists(pid):
                     # Process died unexpectedly
                     if status['status'] == 'training':
                         status['status'] = 'error'
                         status['message'] = 'Training process terminated unexpectedly'
-                        self.append_log("Training process terminated unexpectedly")
                     else:
                         status['status'] = 'stopped'
                         status['message'] = 'Training process not found'
             return status
         except (json.JSONDecodeError, ValueError):
             return default_status

         if not self.status_file.exists():
             return default_status
         try:
             with open(self.status_file, 'r') as f:
                 status = json.load(f)
             # Check if process is actually running
             if self.pid_file.exists():
                 with open(self.pid_file, 'r') as f:
                 if not psutil.pid_exists(pid):
                     # Process died unexpectedly
                     if status['status'] == 'training':
+                        # Only log this once by checking if we've already updated the status
+                        if not hasattr(self, '_process_terminated_logged') or not self._process_terminated_logged:
+                            self.append_log("Training process terminated unexpectedly")
+                            self._process_terminated_logged = True
                         status['status'] = 'error'
                         status['message'] = 'Training process terminated unexpectedly'
+                        # Update the status file to avoid repeated logging
+                        with open(self.status_file, 'w') as f:
+                            json.dump(status, f, indent=2)
                     else:
                         status['status'] = 'stopped'
                         status['message'] = 'Training process not found'
             return status
         except (json.JSONDecodeError, ValueError):
             return default_status