Tonic commited on
Commit
829d8f4
·
verified ·
1 Parent(s): e8aadec

fix large training script

Browse files
Files changed (1) hide show
  1. run_a100_large_experiment.py +12 -4
run_a100_large_experiment.py CHANGED
@@ -123,14 +123,22 @@ def main():
123
  try:
124
  from train import main as train_main
125
 
126
- # Set up training arguments
127
  train_args = [
128
- "--config", args.config,
129
- "--output-dir", args.output_dir,
130
  ]
131
 
132
  if args.resume:
133
- train_args.extend(["--resume", args.resume])
 
 
 
 
 
 
 
 
134
 
135
  # Override sys.argv for the training script
136
  original_argv = sys.argv
 
123
  try:
124
  from train import main as train_main
125
 
126
+ # Set up training arguments - config is positional, not --config
127
  train_args = [
128
+ args.config, # Config file as positional argument
129
+ "--out_dir", args.output_dir,
130
  ]
131
 
132
  if args.resume:
133
+ train_args.extend(["--init_from", "resume"])
134
+
135
+ # Add Trackio arguments if provided
136
+ if args.trackio_url:
137
+ train_args.extend(["--trackio_url", args.trackio_url])
138
+ if args.trackio_token:
139
+ train_args.extend(["--trackio_token", args.trackio_token])
140
+ if args.experiment_name:
141
+ train_args.extend(["--experiment_name", args.experiment_name])
142
 
143
  # Override sys.argv for the training script
144
  original_argv = sys.argv