Safetensors
llama
p2l-135m-grk-01112025 / training_config.json
evanfrick's picture
commit files to HF hub
f8e9886
raw
history blame
847 Bytes
{
"proj_name": "SmolLM2-135M-Instruct-bag-full-train-half-batch",
"learning_rate": 8e-06,
"adam_epsilon": 1e-08,
"batch_size": 4,
"max_length": 8192,
"num_train_epochs": 1,
"train_data_path": "full-p2l-bag-data-01082025",
"val_data_path": "p2el/canonical_bt_val_data_11092024",
"output_dir": "training_outputs",
"pretrain_model_name": "HuggingFaceTB/SmolLM2-135M-Instruct",
"gradient_accumulation_steps": 16,
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
"model_type": "llama",
"head_type": "rk",
"loss_type": "bag",
"weighted_loss": false,
"deepspeed_config_path": "deepspeed/zero1.json",
"init_type": "reset_params",
"load_train_data_from_disk": true
}