tangledgroup
/

tangled-llama-i-128k-v0.1

Text Generation

Inference Endpoints

Model card Files Files and versions Community

mtasic85 commited on 2 days ago

Commit

cfb3029

•

1 Parent(s): e66e4fa

sophia_opt.SophiaG

Files changed (2) hide show

scripts/pretrain-model.yaml +22 -11
scripts/requirements.in +2 -0

scripts/pretrain-model.yaml CHANGED Viewed

@@ -115,21 +115,32 @@ eval:
   final_validation: true
 # Optimizer-related arguments
 optimizer:
-  # class_path: torch.optim.AdamW
-  class_path: grokadamw.GrokAdamW
   init_args:
-    #   (type: float, default: 0.001)
-    lr: 4e-04
-    #   (type: float, default: 0.01)
-    weight_decay: 0.1
-    #   (type: tuple, default: (0.9,0.999))
     betas:
-      - 0.9
-      - 0.95
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto

   final_validation: true
 # Optimizer-related arguments
+# optimizer:
+#   # class_path: torch.optim.AdamW
+#   class_path: grokadamw.GrokAdamW
+#
+#   init_args:
+#     #   (type: float, default: 0.001)
+#     lr: 4e-04
+#
+#     #   (type: float, default: 0.01)
+#     weight_decay: 0.1
+#
+#     #   (type: tuple, default: (0.9,0.999))
+#     betas:
+#       - 0.9
+#       - 0.95
 optimizer:
+  class_path: sophia_opt.SophiaG
   init_args:
+    lr: 2e-4
     betas:
+      - 0.965
+      - 0.99
+    rho: 0.01
+    weight_decay: 1e-1
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto

scripts/requirements.in CHANGED Viewed

@@ -17,3 +17,5 @@ grokadamw
 # pyzstd
 # zstd
 Pillow

 # pyzstd
 # zstd
 Pillow
+sophia-opt