Tonic commited on
Commit
e092247
·
verified ·
1 Parent(s): 0c97eb6

solves adam to adam_torch optimizer issue

Browse files
config/train_smollm3.py CHANGED
@@ -29,7 +29,7 @@ class SmolLM3Config:
29
  save_interval: int = 500
30
 
31
  # Optimizer configuration
32
- optimizer: str = "adamw"
33
  beta1: float = 0.9
34
  beta2: float = 0.95
35
  eps: float = 1e-8
 
29
  save_interval: int = 500
30
 
31
  # Optimizer configuration
32
+ optimizer: str = "adamw_torch"
33
  beta1: float = 0.9
34
  beta2: float = 0.95
35
  eps: float = 1e-8
config/train_smollm3_openhermes_fr.py CHANGED
@@ -30,7 +30,7 @@ class SmolLM3ConfigOpenHermesFR(SmolLM3Config):
30
  save_interval: int = 500
31
 
32
  # Optimizer configuration
33
- optimizer: str = "adamw"
34
  beta1: float = 0.9
35
  beta2: float = 0.95
36
  eps: float = 1e-8
 
30
  save_interval: int = 500
31
 
32
  # Optimizer configuration
33
+ optimizer: str = "adamw_torch"
34
  beta1: float = 0.9
35
  beta2: float = 0.95
36
  eps: float = 1e-8
config/train_smollm3_openhermes_fr_a100_large.py CHANGED
@@ -30,7 +30,7 @@ class SmolLM3ConfigOpenHermesFRA100Large(SmolLM3Config):
30
  save_interval: int = 1000 # Less frequent saving
31
 
32
  # Optimizer configuration - optimized for large batches
33
- optimizer: str = "adamw"
34
  beta1: float = 0.9
35
  beta2: float = 0.999 # Higher beta2 for stability with large batches
36
  eps: float = 1e-8
 
30
  save_interval: int = 1000 # Less frequent saving
31
 
32
  # Optimizer configuration - optimized for large batches
33
+ optimizer: str = "adamw_torch"
34
  beta1: float = 0.9
35
  beta2: float = 0.999 # Higher beta2 for stability with large batches
36
  eps: float = 1e-8
config/train_smollm3_openhermes_fr_a100_multiple_passes.py CHANGED
@@ -30,7 +30,7 @@ class SmolLM3ConfigOpenHermesFRMultiplePasses(SmolLM3Config):
30
  save_interval: int = 2000 # Less frequent saving
31
 
32
  # Optimizer configuration - stability focused
33
- optimizer: str = "adamw"
34
  beta1: float = 0.9
35
  beta2: float = 0.999 # Higher beta2 for stability
36
  eps: float = 1e-8
 
30
  save_interval: int = 2000 # Less frequent saving
31
 
32
  # Optimizer configuration - stability focused
33
+ optimizer: str = "adamw_torch"
34
  beta1: float = 0.9
35
  beta2: float = 0.999 # Higher beta2 for stability
36
  eps: float = 1e-8