jbilcke-hf HF Staff commited on
Commit
61a19ec
·
1 Parent(s): 1042322

debugging..

Browse files
Files changed (1) hide show
  1. vms/services/trainer.py +14 -14
vms/services/trainer.py CHANGED
@@ -1280,20 +1280,20 @@ class TrainingService:
1280
  # Parse metrics only from stdout
1281
  metrics = parse_training_log(line)
1282
  if metrics:
1283
- status = self.get_status()
1284
- status.update(metrics)
1285
- self.save_status(**status)
1286
-
1287
- # Extract total_steps and current_step for progress tracking
1288
- if 'step' in metrics:
1289
- current_step = metrics['step']
1290
- if 'total_steps' in status:
1291
- total_steps = status['total_steps']
1292
-
1293
- # Update progress bar if available and total_steps is known
1294
- if progress_obj and total_steps > 0:
1295
- progress_value = min(0.99, current_step / total_steps)
1296
- progress_obj(progress_value, desc=f"Training: step {current_step}/{total_steps}")
1297
  return True
1298
  return False
1299
 
 
1280
  # Parse metrics only from stdout
1281
  metrics = parse_training_log(line)
1282
  if metrics:
1283
+ # Get current status first
1284
+ current_status = self.get_status()
1285
+
1286
+ # Update with new metrics
1287
+ current_status.update(metrics)
1288
+
1289
+ # Ensure 'state' is present, use current status if available, default to 'training'
1290
+ if 'status' in current_status:
1291
+ # Use 'status' as 'state' to match the required parameter
1292
+ state = current_status.pop('status', 'training')
1293
+ self.save_status(state, **current_status)
1294
+ else:
1295
+ # If no status in the current_status, use 'training' as the default state
1296
+ self.save_status('training', **current_status)
1297
  return True
1298
  return False
1299