mtasic85 commited on
Commit
3981027
1 Parent(s): 0ef92af

sophia_opt.SophiaG

Browse files
Files changed (1) hide show
  1. scripts/pretrain-model.yaml +6 -3
scripts/pretrain-model.yaml CHANGED
@@ -65,10 +65,12 @@ train:
65
  log_interval: 1
66
 
67
  # Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
68
- global_batch_size: 512
 
69
 
70
  # Number of samples per data-parallel rank (type: int, default: 4)
71
- micro_batch_size: 2
 
72
 
73
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
74
  lr_warmup_steps: 2000
@@ -100,7 +102,8 @@ train:
100
  # Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
101
  eval:
102
  # Number of optimizer steps between evaluation calls (type: int, default: 1000)
103
- interval: 100
 
104
 
105
  # Number of tokens to generate (type: Optional[int], default: null)
106
  max_new_tokens:
 
65
  log_interval: 1
66
 
67
  # Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
68
+ # global_batch_size: 512
69
+ global_batch_size: 256
70
 
71
  # Number of samples per data-parallel rank (type: int, default: 4)
72
+ # micro_batch_size: 2
73
+ micro_batch_size: 6
74
 
75
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
76
  lr_warmup_steps: 2000
 
102
  # Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
103
  eval:
104
  # Number of optimizer steps between evaluation calls (type: int, default: 1000)
105
+ # interval: 100
106
+ interval: 20
107
 
108
  # Number of tokens to generate (type: Optional[int], default: null)
109
  max_new_tokens: