jbilcke-hf HF Staff commited on
Commit
f1c60d3
·
1 Parent(s): b7c0de1
vms/ui/app_ui.py CHANGED
@@ -234,7 +234,7 @@ class AppUI:
234
  self.project_tabs["train_tab"].components["num_gpus"],
235
  self.project_tabs["train_tab"].components["precomputation_items"],
236
  self.project_tabs["train_tab"].components["lr_warmup_steps"],
237
- self.project_tabs["train_tab"].components["auto_resume_checkbox"]
238
  ]
239
  )
240
 
 
234
  self.project_tabs["train_tab"].components["num_gpus"],
235
  self.project_tabs["train_tab"].components["precomputation_items"],
236
  self.project_tabs["train_tab"].components["lr_warmup_steps"],
237
+ self.project_tabs["train_tab"].components["auto_resume"]
238
  ]
239
  )
240
 
vms/ui/project/services/training.py CHANGED
@@ -1078,7 +1078,8 @@ class TrainingService:
1078
  "learning_rate": ui_state.get("learning_rate", DEFAULT_LEARNING_RATE),
1079
  "save_iterations": ui_state.get("save_iterations", DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
1080
  "preset_name": ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
1081
- "repo_id": "" # Default empty repo ID
 
1082
  }
1083
  }
1084
  logger.info("Created default session from UI state for recovery")
@@ -1150,7 +1151,7 @@ class TrainingService:
1150
  "learning_rate": params.get('learning_rate', DEFAULT_LEARNING_RATE),
1151
  "save_iterations": params.get('save_iterations', DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
1152
  "training_preset": params.get('preset_name', list(TRAINING_PRESETS.keys())[0]),
1153
- "auto_resume_checkbox": params.get("auto_resume", DEFAULT_AUTO_RESUME)
1154
  })
1155
 
1156
  # Check if we should auto-recover (immediate restart)
 
1078
  "learning_rate": ui_state.get("learning_rate", DEFAULT_LEARNING_RATE),
1079
  "save_iterations": ui_state.get("save_iterations", DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
1080
  "preset_name": ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
1081
+ "repo_id": "" # Default empty repo ID,
1082
+ "auto_resume": ui_state.get("auto_resume", DEFAULT_AUTO_RESUME)
1083
  }
1084
  }
1085
  logger.info("Created default session from UI state for recovery")
 
1151
  "learning_rate": params.get('learning_rate', DEFAULT_LEARNING_RATE),
1152
  "save_iterations": params.get('save_iterations', DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
1153
  "training_preset": params.get('preset_name', list(TRAINING_PRESETS.keys())[0]),
1154
+ "auto_resume": params.get("auto_resume", DEFAULT_AUTO_RESUME)
1155
  })
1156
 
1157
  # Check if we should auto-recover (immediate restart)
vms/ui/project/tabs/train_tab.py CHANGED
@@ -233,7 +233,7 @@ class TrainTab(BaseTab):
233
  )
234
 
235
  with gr.Row():
236
- self.components["auto_resume_checkbox"] = gr.Checkbox(
237
  label="Automatically continue training in case of server reboot.",
238
  value=DEFAULT_AUTO_RESUME,
239
  info="When enabled, training will automatically resume from the latest checkpoint after app restart"
@@ -389,9 +389,9 @@ class TrainTab(BaseTab):
389
  ]
390
  )
391
 
392
- self.components["auto_resume_checkbox"].change(
393
  fn=lambda v: self.app.update_ui_state(auto_resume=v),
394
- inputs=[self.components["auto_resume_checkbox"]],
395
  outputs=[]
396
  )
397
 
 
233
  )
234
 
235
  with gr.Row():
236
+ self.components["auto_resume"] = gr.Checkbox(
237
  label="Automatically continue training in case of server reboot.",
238
  value=DEFAULT_AUTO_RESUME,
239
  info="When enabled, training will automatically resume from the latest checkpoint after app restart"
 
389
  ]
390
  )
391
 
392
+ self.components["auto_resume"].change(
393
  fn=lambda v: self.app.update_ui_state(auto_resume=v),
394
+ inputs=[self.components["auto_resume"]],
395
  outputs=[]
396
  )
397