Spaces:
Running
Running
solves adam to adam_torch optimizer issue
Browse files
config/train_smollm3.py
CHANGED
@@ -29,7 +29,7 @@ class SmolLM3Config:
|
|
29 |
save_interval: int = 500
|
30 |
|
31 |
# Optimizer configuration
|
32 |
-
optimizer: str = "
|
33 |
beta1: float = 0.9
|
34 |
beta2: float = 0.95
|
35 |
eps: float = 1e-8
|
|
|
29 |
save_interval: int = 500
|
30 |
|
31 |
# Optimizer configuration
|
32 |
+
optimizer: str = "adamw_torch"
|
33 |
beta1: float = 0.9
|
34 |
beta2: float = 0.95
|
35 |
eps: float = 1e-8
|
config/train_smollm3_openhermes_fr.py
CHANGED
@@ -30,7 +30,7 @@ class SmolLM3ConfigOpenHermesFR(SmolLM3Config):
|
|
30 |
save_interval: int = 500
|
31 |
|
32 |
# Optimizer configuration
|
33 |
-
optimizer: str = "
|
34 |
beta1: float = 0.9
|
35 |
beta2: float = 0.95
|
36 |
eps: float = 1e-8
|
|
|
30 |
save_interval: int = 500
|
31 |
|
32 |
# Optimizer configuration
|
33 |
+
optimizer: str = "adamw_torch"
|
34 |
beta1: float = 0.9
|
35 |
beta2: float = 0.95
|
36 |
eps: float = 1e-8
|
config/train_smollm3_openhermes_fr_a100_large.py
CHANGED
@@ -30,7 +30,7 @@ class SmolLM3ConfigOpenHermesFRA100Large(SmolLM3Config):
|
|
30 |
save_interval: int = 1000 # Less frequent saving
|
31 |
|
32 |
# Optimizer configuration - optimized for large batches
|
33 |
-
optimizer: str = "
|
34 |
beta1: float = 0.9
|
35 |
beta2: float = 0.999 # Higher beta2 for stability with large batches
|
36 |
eps: float = 1e-8
|
|
|
30 |
save_interval: int = 1000 # Less frequent saving
|
31 |
|
32 |
# Optimizer configuration - optimized for large batches
|
33 |
+
optimizer: str = "adamw_torch"
|
34 |
beta1: float = 0.9
|
35 |
beta2: float = 0.999 # Higher beta2 for stability with large batches
|
36 |
eps: float = 1e-8
|
config/train_smollm3_openhermes_fr_a100_multiple_passes.py
CHANGED
@@ -30,7 +30,7 @@ class SmolLM3ConfigOpenHermesFRMultiplePasses(SmolLM3Config):
|
|
30 |
save_interval: int = 2000 # Less frequent saving
|
31 |
|
32 |
# Optimizer configuration - stability focused
|
33 |
-
optimizer: str = "
|
34 |
beta1: float = 0.9
|
35 |
beta2: float = 0.999 # Higher beta2 for stability
|
36 |
eps: float = 1e-8
|
|
|
30 |
save_interval: int = 2000 # Less frequent saving
|
31 |
|
32 |
# Optimizer configuration - stability focused
|
33 |
+
optimizer: str = "adamw_torch"
|
34 |
beta1: float = 0.9
|
35 |
beta2: float = 0.999 # Higher beta2 for stability
|
36 |
eps: float = 1e-8
|