tangledgroup
/

tangled-llama-i-128k-v0.1

Text Generation

Inference Endpoints

Model card Files Files and versions Community

mtasic85 commited on 2 days ago

Commit

a1af5c6

•

1 Parent(s): 11df753

sophia_opt.SophiaG

Files changed (1) hide show

scripts/pretrain-model.yaml +2 -4

scripts/pretrain-model.yaml CHANGED Viewed

@@ -65,12 +65,10 @@ train:
   log_interval: 1
   # Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
-  # global_batch_size: 512
-  global_batch_size: 256
   # Number of samples per data-parallel rank (type: int, default: 4)
-  # micro_batch_size: 2
-  micro_batch_size: 4
   # Number of iterations with learning rate warmup active (type: int, default: 2000)
   lr_warmup_steps: 2000

   log_interval: 1
   # Number of samples between optimizer steps across data-parallel ranks (type: int, default: 512)
+  global_batch_size: 512
   # Number of samples per data-parallel rank (type: int, default: 4)
+  micro_batch_size: 2
   # Number of iterations with learning rate warmup active (type: int, default: 2000)
   lr_warmup_steps: 2000