|
{ |
|
"proj_name": "SmolLM2-135M-Instruct-bag-full-train-half-batch", |
|
"learning_rate": 8e-06, |
|
"adam_epsilon": 1e-08, |
|
"batch_size": 4, |
|
"max_length": 8192, |
|
"num_train_epochs": 1, |
|
"train_data_path": "full-p2l-bag-data-01082025", |
|
"val_data_path": "p2el/canonical_bt_val_data_11092024", |
|
"output_dir": "training_outputs", |
|
"pretrain_model_name": "HuggingFaceTB/SmolLM2-135M-Instruct", |
|
"gradient_accumulation_steps": 16, |
|
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", |
|
"model_type": "llama", |
|
"head_type": "rk", |
|
"loss_type": "bag", |
|
"weighted_loss": false, |
|
"deepspeed_config_path": "deepspeed/zero1.json", |
|
"init_type": "reset_params", |
|
"load_train_data_from_disk": true |
|
} |