|
{ |
|
"best_metric": 3.468397378921509, |
|
"best_model_checkpoint": "/datadrive/disk1/pierpaolo/llm/itlangadapt/bloom-1b3_it/checkpoint-50000", |
|
"epoch": 5.236680193742637, |
|
"global_step": 50000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00098, |
|
"loss": 3.4734, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00096, |
|
"loss": 3.466, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00094, |
|
"loss": 3.461, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00092, |
|
"loss": 3.4725, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0009000000000000001, |
|
"loss": 3.4614, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 3.469589948654175, |
|
"eval_runtime": 358.4452, |
|
"eval_samples_per_second": 10.624, |
|
"eval_steps_per_second": 5.312, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00088, |
|
"loss": 3.4653, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00086, |
|
"loss": 3.4618, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00084, |
|
"loss": 3.4707, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00082002, |
|
"loss": 3.4706, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0008000199999999999, |
|
"loss": 3.4694, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 3.4690916538238525, |
|
"eval_runtime": 358.2981, |
|
"eval_samples_per_second": 10.628, |
|
"eval_steps_per_second": 5.314, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0007800200000000001, |
|
"loss": 3.4686, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00076002, |
|
"loss": 3.4636, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0007400400000000001, |
|
"loss": 3.463, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00072006, |
|
"loss": 3.4694, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0007000600000000001, |
|
"loss": 3.4709, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 3.4688496589660645, |
|
"eval_runtime": 358.397, |
|
"eval_samples_per_second": 10.625, |
|
"eval_steps_per_second": 5.313, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00068006, |
|
"loss": 3.4627, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0006600800000000001, |
|
"loss": 3.4658, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00064008, |
|
"loss": 3.4661, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00062008, |
|
"loss": 3.4615, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0006001, |
|
"loss": 3.481, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 3.468804359436035, |
|
"eval_runtime": 359.4028, |
|
"eval_samples_per_second": 10.595, |
|
"eval_steps_per_second": 5.298, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0005801, |
|
"loss": 3.465, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00056012, |
|
"loss": 3.4656, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00054012, |
|
"loss": 3.472, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00052012, |
|
"loss": 3.4574, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0005001200000000001, |
|
"loss": 3.4638, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 3.468665599822998, |
|
"eval_runtime": 358.6249, |
|
"eval_samples_per_second": 10.618, |
|
"eval_steps_per_second": 5.309, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.00048012, |
|
"loss": 3.4611, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00046012, |
|
"loss": 3.4631, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00044014, |
|
"loss": 3.4626, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.00042014, |
|
"loss": 3.469, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.00040018, |
|
"loss": 3.4704, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 3.468623399734497, |
|
"eval_runtime": 359.7336, |
|
"eval_samples_per_second": 10.586, |
|
"eval_steps_per_second": 5.293, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00038018000000000004, |
|
"loss": 3.4659, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00036018, |
|
"loss": 3.4706, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.00034018, |
|
"loss": 3.4659, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00032018000000000004, |
|
"loss": 3.4669, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00030018, |
|
"loss": 3.4617, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_loss": 3.4684813022613525, |
|
"eval_runtime": 361.1986, |
|
"eval_samples_per_second": 10.543, |
|
"eval_steps_per_second": 5.271, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00028020000000000003, |
|
"loss": 3.4703, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00026024, |
|
"loss": 3.4606, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.00024026, |
|
"loss": 3.4549, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.00022026, |
|
"loss": 3.4701, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.00020026, |
|
"loss": 3.4636, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_loss": 3.468475103378296, |
|
"eval_runtime": 358.6453, |
|
"eval_samples_per_second": 10.618, |
|
"eval_steps_per_second": 5.309, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00018026, |
|
"loss": 3.4541, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00016026000000000001, |
|
"loss": 3.4724, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.00014026, |
|
"loss": 3.4683, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.00012026, |
|
"loss": 3.463, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.00010026, |
|
"loss": 3.4668, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"eval_loss": 3.4684269428253174, |
|
"eval_runtime": 358.1004, |
|
"eval_samples_per_second": 10.634, |
|
"eval_steps_per_second": 5.317, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 8.028000000000001e-05, |
|
"loss": 3.4696, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 6.028e-05, |
|
"loss": 3.4605, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.028000000000001e-05, |
|
"loss": 3.4645, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 2.028e-05, |
|
"loss": 3.4704, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 2.7999999999999997e-07, |
|
"loss": 3.4623, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 3.468397378921509, |
|
"eval_runtime": 358.1322, |
|
"eval_samples_per_second": 10.633, |
|
"eval_steps_per_second": 5.316, |
|
"step": 50000 |
|
} |
|
], |
|
"max_steps": 50000, |
|
"num_train_epochs": 6, |
|
"total_flos": 2.9704946454626304e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|