jbilcke-hf HF Staff commited on
Commit
0567ba0
·
1 Parent(s): 9000726
vms/ui/project/services/training.py CHANGED
@@ -1146,15 +1146,12 @@ class TrainingService:
1146
  auto_recover = True # Always auto-recover on startup
1147
 
1148
  if auto_recover:
1149
- # Rest of the auto-recovery code remains unchanged
1150
  try:
1151
- # Use the internal model_type for the actual training
1152
- # But keep model_type_display for the UI
1153
  result = self.start_training(
1154
  model_type=model_type_internal,
1155
  lora_rank=params.get('lora_rank', DEFAULT_LORA_RANK_STR),
1156
  lora_alpha=params.get('lora_alpha', DEFAULT_LORA_ALPHA_STR),
1157
- train_size=params.get('train_steps', DEFAULT_NB_TRAINING_STEPS),
1158
  batch_size=params.get('batch_size', DEFAULT_BATCH_SIZE),
1159
  learning_rate=params.get('learning_rate', DEFAULT_LEARNING_RATE),
1160
  save_iterations=params.get('save_iterations', DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
 
1146
  auto_recover = True # Always auto-recover on startup
1147
 
1148
  if auto_recover:
 
1149
  try:
 
 
1150
  result = self.start_training(
1151
  model_type=model_type_internal,
1152
  lora_rank=params.get('lora_rank', DEFAULT_LORA_RANK_STR),
1153
  lora_alpha=params.get('lora_alpha', DEFAULT_LORA_ALPHA_STR),
1154
+ train_steps=params.get('train_steps', DEFAULT_NB_TRAINING_STEPS),
1155
  batch_size=params.get('batch_size', DEFAULT_BATCH_SIZE),
1156
  learning_rate=params.get('learning_rate', DEFAULT_LEARNING_RATE),
1157
  save_iterations=params.get('save_iterations', DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),