jbilcke-hf HF Staff commited on
Commit
4905a7d
·
1 Parent(s): a73397c
Files changed (3) hide show
  1. app.py +15 -5
  2. training_log_parser.py +1 -0
  3. training_service.py +7 -6
app.py CHANGED
@@ -63,6 +63,8 @@ class VideoTrainerUI:
63
  """Update UI components based on training state"""
64
  updates = {}
65
 
 
 
66
  # Update status box with high-level information
67
  status_text = []
68
  if training_state["status"] != "idle":
@@ -258,10 +260,13 @@ class VideoTrainerUI:
258
 
259
  def update_training_buttons(self, training_state: Dict[str, Any]) -> Dict:
260
  """Update training control buttons based on state"""
 
261
  is_training = training_state["status"] in ["training", "initializing"]
 
 
262
  is_paused = training_state["status"] == "paused"
263
  is_completed = training_state["status"] in ["completed", "error", "stopped"]
264
-
265
  return {
266
  "start_btn": gr.Button(
267
  interactive=not is_training and not is_paused,
@@ -289,8 +294,10 @@ class VideoTrainerUI:
289
  })
290
 
291
  def handle_pause_resume(self):
 
292
  status = self.trainer.get_status()
293
- if status["state"] == "paused":
 
294
  result = self.trainer.resume_training()
295
  new_state = {"status": "training"}
296
  else:
@@ -623,6 +630,8 @@ class VideoTrainerUI:
623
 
624
  status_update = status["message"]
625
 
 
 
626
  # Parse new log lines
627
  if logs:
628
  last_state = None
@@ -630,6 +639,7 @@ class VideoTrainerUI:
630
  state_update = self.log_parser.parse_line(line)
631
  if state_update:
632
  last_state = state_update
 
633
 
634
  if last_state:
635
  ui_updates = self.update_training_ui(last_state)
@@ -648,6 +658,8 @@ class VideoTrainerUI:
648
  "message": status
649
  }
650
 
 
 
651
  if is_completed:
652
  button_updates = self.handle_training_complete()
653
  return (
@@ -1129,9 +1141,7 @@ class VideoTrainerUI:
1129
  ],
1130
  outputs=[status_box, log_box]
1131
  ).success(
1132
- fn=lambda: self.update_training_buttons({
1133
- "status": "training"
1134
- }),
1135
  outputs=[start_btn, stop_btn, pause_resume_btn]
1136
  )
1137
 
 
63
  """Update UI components based on training state"""
64
  updates = {}
65
 
66
+ print("update_training_ui: training_state = ", training_state)
67
+
68
  # Update status box with high-level information
69
  status_text = []
70
  if training_state["status"] != "idle":
 
260
 
261
  def update_training_buttons(self, training_state: Dict[str, Any]) -> Dict:
262
  """Update training control buttons based on state"""
263
+ #print("update_training_buttons: training_state = ", training_state)
264
  is_training = training_state["status"] in ["training", "initializing"]
265
+ if training_state["message"] == "No training in progress":
266
+ is_training = False
267
  is_paused = training_state["status"] == "paused"
268
  is_completed = training_state["status"] in ["completed", "error", "stopped"]
269
+ #print(f"update_training_buttons: is_training = {is_training}, is_paused = {is_paused}, is_completed = {is_completed}")
270
  return {
271
  "start_btn": gr.Button(
272
  interactive=not is_training and not is_paused,
 
294
  })
295
 
296
  def handle_pause_resume(self):
297
+
298
  status = self.trainer.get_status()
299
+ print("handle_pause_resume: status = ", status)
300
+ if status["status"] == "paused":
301
  result = self.trainer.resume_training()
302
  new_state = {"status": "training"}
303
  else:
 
630
 
631
  status_update = status["message"]
632
 
633
+ # print(f"refresh_training_status_and_logs: ", status)
634
+
635
  # Parse new log lines
636
  if logs:
637
  last_state = None
 
639
  state_update = self.log_parser.parse_line(line)
640
  if state_update:
641
  last_state = state_update
642
+ print("last_state = ", last_state)
643
 
644
  if last_state:
645
  ui_updates = self.update_training_ui(last_state)
 
658
  "message": status
659
  }
660
 
661
+ #print("refresh_training_status: current_state = ", current_state)
662
+
663
  if is_completed:
664
  button_updates = self.handle_training_complete()
665
  return (
 
1141
  ],
1142
  outputs=[status_box, log_box]
1143
  ).success(
1144
+ fn=lambda: self.update_training_buttons(),
 
 
1145
  outputs=[start_btn, stop_btn, pause_resume_btn]
1146
  )
1147
 
training_log_parser.py CHANGED
@@ -73,6 +73,7 @@ class TrainingLogParser:
73
  if "Training steps:" in line:
74
  # Set status to training if we see this
75
  self.state.status = "training"
 
76
  if not self.state.start_time:
77
  self.state.start_time = datetime.now()
78
 
 
73
  if "Training steps:" in line:
74
  # Set status to training if we see this
75
  self.state.status = "training"
76
+ print("setting status to 'training'")
77
  if not self.state.start_time:
78
  self.state.start_time = datetime.now()
79
 
training_service.py CHANGED
@@ -29,7 +29,7 @@ logging.basicConfig(
29
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
30
  handlers=[
31
  logging.StreamHandler(sys.stdout),
32
- logging.FileHandler('training_service.log')
33
  ]
34
  )
35
  logger = logging.getLogger(__name__)
@@ -65,7 +65,7 @@ class TrainingService:
65
 
66
  def get_status(self) -> Dict:
67
  """Get current training status"""
68
- default_status = {'state': 'stopped', 'message': 'No training in progress'}
69
 
70
  if not self.status_file.exists():
71
  return default_status
@@ -73,6 +73,7 @@ class TrainingService:
73
  try:
74
  with open(self.status_file, 'r') as f:
75
  status = json.load(f)
 
76
 
77
  # Check if process is actually running
78
  if self.pid_file.exists():
@@ -80,12 +81,12 @@ class TrainingService:
80
  pid = int(f.read().strip())
81
  if not psutil.pid_exists(pid):
82
  # Process died unexpectedly
83
- if status['state'] == 'running':
84
- status['state'] = 'error'
85
  status['message'] = 'Training process terminated unexpectedly'
86
  self.append_log("Training process terminated unexpectedly")
87
  else:
88
- status['state'] = 'stopped'
89
  status['message'] = 'Training process not found'
90
  return status
91
 
@@ -432,7 +433,7 @@ class TrainingService:
432
  def save_status(self, state: str, **kwargs) -> None:
433
  """Save current training status"""
434
  status = {
435
- 'state': state,
436
  'timestamp': datetime.now().isoformat(),
437
  **kwargs
438
  }
 
29
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
30
  handlers=[
31
  logging.StreamHandler(sys.stdout),
32
+ logging.FileHandler(str(OUTPUT_PATH / 'training_service.log'))
33
  ]
34
  )
35
  logger = logging.getLogger(__name__)
 
65
 
66
  def get_status(self) -> Dict:
67
  """Get current training status"""
68
+ default_status = {'status': 'stopped', 'message': 'No training in progress'}
69
 
70
  if not self.status_file.exists():
71
  return default_status
 
73
  try:
74
  with open(self.status_file, 'r') as f:
75
  status = json.load(f)
76
+ print("status found in the json:", status)
77
 
78
  # Check if process is actually running
79
  if self.pid_file.exists():
 
81
  pid = int(f.read().strip())
82
  if not psutil.pid_exists(pid):
83
  # Process died unexpectedly
84
+ if status['status'] == 'running':
85
+ status['status'] = 'error'
86
  status['message'] = 'Training process terminated unexpectedly'
87
  self.append_log("Training process terminated unexpectedly")
88
  else:
89
+ status['status'] = 'stopped'
90
  status['message'] = 'Training process not found'
91
  return status
92
 
 
433
  def save_status(self, state: str, **kwargs) -> None:
434
  """Save current training status"""
435
  status = {
436
+ 'status': state,
437
  'timestamp': datetime.now().isoformat(),
438
  **kwargs
439
  }