Spaces:
Running
Running
adds parameters to medical config
Browse files
config/train_gpt_oss_medical_o1_sft.py
CHANGED
@@ -56,12 +56,13 @@ config = GPTOSSEnhancedCustomConfig(
|
|
56 |
# TRAINING HYPERPARAMETERS
|
57 |
# ============================================================================
|
58 |
num_train_epochs=1.0,
|
59 |
-
batch_size=
|
60 |
-
gradient_accumulation_steps=
|
61 |
learning_rate=2e-4,
|
62 |
min_lr=2e-5,
|
63 |
weight_decay=0.01,
|
64 |
warmup_ratio=0.03,
|
|
|
65 |
max_grad_norm=1.0,
|
66 |
|
67 |
# Sequence length
|
@@ -113,7 +114,7 @@ config = GPTOSSEnhancedCustomConfig(
|
|
113 |
# LOGGING & EVAL
|
114 |
# ============================================================================
|
115 |
eval_strategy="steps",
|
116 |
-
eval_steps=
|
117 |
logging_steps=10,
|
118 |
save_strategy="steps",
|
119 |
save_steps=500,
|
@@ -124,8 +125,8 @@ config = GPTOSSEnhancedCustomConfig(
|
|
124 |
load_best_model_at_end=False,
|
125 |
eval_accumulation_steps=2,
|
126 |
eval_batch_size=1,
|
127 |
-
eval_ratio=0.
|
128 |
-
test_ratio=0.
|
129 |
|
130 |
# ============================================================================
|
131 |
# MONITORING & HUB
|
|
|
56 |
# TRAINING HYPERPARAMETERS
|
57 |
# ============================================================================
|
58 |
num_train_epochs=1.0,
|
59 |
+
batch_size=4,
|
60 |
+
gradient_accumulation_steps=4,
|
61 |
learning_rate=2e-4,
|
62 |
min_lr=2e-5,
|
63 |
weight_decay=0.01,
|
64 |
warmup_ratio=0.03,
|
65 |
+
warmup_steps=50,
|
66 |
max_grad_norm=1.0,
|
67 |
|
68 |
# Sequence length
|
|
|
114 |
# LOGGING & EVAL
|
115 |
# ============================================================================
|
116 |
eval_strategy="steps",
|
117 |
+
eval_steps=100,
|
118 |
logging_steps=10,
|
119 |
save_strategy="steps",
|
120 |
save_steps=500,
|
|
|
125 |
load_best_model_at_end=False,
|
126 |
eval_accumulation_steps=2,
|
127 |
eval_batch_size=1,
|
128 |
+
eval_ratio=0.001,
|
129 |
+
test_ratio=0.0005,
|
130 |
|
131 |
# ============================================================================
|
132 |
# MONITORING & HUB
|