Upload configs.yaml with huggingface_hub
Browse files- configs.yaml +2 -2
configs.yaml
CHANGED
@@ -9,7 +9,7 @@ eval_strategy: 'no'
|
|
9 |
finetuning_type: full
|
10 |
formatting: sharegpt
|
11 |
global_batch_size: 96
|
12 |
-
gradient_accumulation_steps:
|
13 |
hub_model_id: mlfoundations-dev/llama3-1_8b_4o_annotated_aops
|
14 |
include_hp: dcft/train/hp_settings/reasoning.yaml
|
15 |
learning_rate: 1.0e-05
|
@@ -21,7 +21,7 @@ model_name_or_path: Qwen/Qwen2.5-7B-Instruct
|
|
21 |
num_train_epochs: 3.0
|
22 |
output_dir: /tmp/dcft_checkpoints/llama3-1_8b_4o_annotated_aops
|
23 |
overwrite_cache: true
|
24 |
-
per_device_train_batch_size:
|
25 |
plot_loss: true
|
26 |
preprocessing_num_workers: 16
|
27 |
push_to_db: true
|
|
|
9 |
finetuning_type: full
|
10 |
formatting: sharegpt
|
11 |
global_batch_size: 96
|
12 |
+
gradient_accumulation_steps: 3
|
13 |
hub_model_id: mlfoundations-dev/llama3-1_8b_4o_annotated_aops
|
14 |
include_hp: dcft/train/hp_settings/reasoning.yaml
|
15 |
learning_rate: 1.0e-05
|
|
|
21 |
num_train_epochs: 3.0
|
22 |
output_dir: /tmp/dcft_checkpoints/llama3-1_8b_4o_annotated_aops
|
23 |
overwrite_cache: true
|
24 |
+
per_device_train_batch_size: 1
|
25 |
plot_loss: true
|
26 |
preprocessing_num_workers: 16
|
27 |
push_to_db: true
|