sophia_opt.SophiaG
Browse files
scripts/pretrain-model.yaml
CHANGED
@@ -65,10 +65,12 @@ train:
|
|
65 |
log_interval: 1
|
66 |
|
67 |
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
|
68 |
-
global_batch_size: 512
|
|
|
69 |
|
70 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
71 |
-
micro_batch_size: 2
|
|
|
72 |
|
73 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
74 |
lr_warmup_steps: 2000
|
@@ -100,7 +102,8 @@ train:
|
|
100 |
# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
|
101 |
eval:
|
102 |
# Number of optimizer steps between evaluation calls (type: int, default: 1000)
|
103 |
-
interval: 100
|
|
|
104 |
|
105 |
# Number of tokens to generate (type: Optional[int], default: null)
|
106 |
max_new_tokens:
|
|
|
65 |
log_interval: 1
|
66 |
|
67 |
# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
|
68 |
+
# global_batch_size: 512
|
69 |
+
global_batch_size: 256
|
70 |
|
71 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
72 |
+
# micro_batch_size: 2
|
73 |
+
micro_batch_size: 6
|
74 |
|
75 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
76 |
lr_warmup_steps: 2000
|
|
|
102 |
# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
|
103 |
eval:
|
104 |
# Number of optimizer steps between evaluation calls (type: int, default: 1000)
|
105 |
+
# interval: 100
|
106 |
+
interval: 20
|
107 |
|
108 |
# Number of tokens to generate (type: Optional[int], default: null)
|
109 |
max_new_tokens:
|