File size: 831 Bytes
c6a19a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
{
"proj_name": "SmolLM2-135M-Instruct-full-train",
"learning_rate": 4e-06,
"adam_epsilon": 1e-08,
"batch_size": 4,
"max_length": 8192,
"num_train_epochs": 1,
"train_data_path": "full-p2l-data-01082025",
"val_data_path": "p2el/canonical_bt_val_data_11092024",
"output_dir": "training_outputs",
"pretrain_model_name": "HuggingFaceTB/SmolLM2-135M-Instruct",
"gradient_accumulation_steps": 16,
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
"model_type": "llama",
"head_type": "bt",
"loss_type": "bt_tie",
"weighted_loss": false,
"deepspeed_config_path": "deepspeed/zero1.json",
"init_type": "reset_params",
"load_train_data_from_disk": true
} |