Safetensors
llama
File size: 831 Bytes
c6a19a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
{
 "proj_name": "SmolLM2-135M-Instruct-full-train",
 "learning_rate": 4e-06,
 "adam_epsilon": 1e-08,
 "batch_size": 4,
 "max_length": 8192,
 "num_train_epochs": 1,
 "train_data_path": "full-p2l-data-01082025",
 "val_data_path": "p2el/canonical_bt_val_data_11092024",
 "output_dir": "training_outputs",
 "pretrain_model_name": "HuggingFaceTB/SmolLM2-135M-Instruct",
 "gradient_accumulation_steps": 16,
 "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
 "model_type": "llama",
 "head_type": "bt",
 "loss_type": "bt_tie",
 "weighted_loss": false,
 "deepspeed_config_path": "deepspeed/zero1.json",
 "init_type": "reset_params",
 "load_train_data_from_disk": true
}