|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9632e-05, |
|
"loss": 0.9899, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9232e-05, |
|
"loss": 0.9949, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8832000000000002e-05, |
|
"loss": 0.828, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8432000000000002e-05, |
|
"loss": 0.8372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8032e-05, |
|
"loss": 0.8409, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.6220472440944882, |
|
"eval_loss": 0.8243474364280701, |
|
"eval_runtime": 435.8774, |
|
"eval_samples_per_second": 2.04, |
|
"eval_steps_per_second": 2.04, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7632000000000003e-05, |
|
"loss": 0.694, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7232000000000004e-05, |
|
"loss": 0.6918, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6832e-05, |
|
"loss": 0.6794, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6432e-05, |
|
"loss": 0.6338, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6032e-05, |
|
"loss": 0.6288, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.671541057367829, |
|
"eval_loss": 0.7539446949958801, |
|
"eval_runtime": 434.4073, |
|
"eval_samples_per_second": 2.046, |
|
"eval_steps_per_second": 2.046, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5632000000000002e-05, |
|
"loss": 0.6622, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5232000000000003e-05, |
|
"loss": 0.773, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4832000000000001e-05, |
|
"loss": 0.6051, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4432000000000002e-05, |
|
"loss": 0.7805, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4032e-05, |
|
"loss": 0.5882, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.7075365579302587, |
|
"eval_loss": 0.6791747808456421, |
|
"eval_runtime": 433.8268, |
|
"eval_samples_per_second": 2.049, |
|
"eval_steps_per_second": 2.049, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3632000000000001e-05, |
|
"loss": 0.5672, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3232e-05, |
|
"loss": 0.6807, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2832e-05, |
|
"loss": 0.6796, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2432000000000002e-05, |
|
"loss": 0.6922, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2032000000000001e-05, |
|
"loss": 0.7671, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.7334083239595051, |
|
"eval_loss": 0.6129724383354187, |
|
"eval_runtime": 433.287, |
|
"eval_samples_per_second": 2.052, |
|
"eval_steps_per_second": 2.052, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1632000000000001e-05, |
|
"loss": 0.645, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1232e-05, |
|
"loss": 0.5891, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0832e-05, |
|
"loss": 0.6426, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0432e-05, |
|
"loss": 0.567, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0032000000000002e-05, |
|
"loss": 0.5782, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.7255343082114736, |
|
"eval_loss": 0.6114887595176697, |
|
"eval_runtime": 433.3273, |
|
"eval_samples_per_second": 2.052, |
|
"eval_steps_per_second": 2.052, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.632e-06, |
|
"loss": 0.5736, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.232e-06, |
|
"loss": 0.6849, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.832000000000001e-06, |
|
"loss": 0.5305, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.432e-06, |
|
"loss": 0.7265, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.032e-06, |
|
"loss": 0.5691, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.7412823397075365, |
|
"eval_loss": 0.5794617533683777, |
|
"eval_runtime": 433.4136, |
|
"eval_samples_per_second": 2.051, |
|
"eval_steps_per_second": 2.051, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.632e-06, |
|
"loss": 0.519, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.232e-06, |
|
"loss": 0.5378, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.832000000000001e-06, |
|
"loss": 0.5982, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.432e-06, |
|
"loss": 0.7027, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.032e-06, |
|
"loss": 0.6579, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.7469066366704162, |
|
"eval_loss": 0.5774183869361877, |
|
"eval_runtime": 433.4068, |
|
"eval_samples_per_second": 2.051, |
|
"eval_steps_per_second": 2.051, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.6320000000000005e-06, |
|
"loss": 0.5044, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.232e-06, |
|
"loss": 0.6482, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.8320000000000005e-06, |
|
"loss": 0.5406, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.432e-06, |
|
"loss": 0.5372, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0320000000000005e-06, |
|
"loss": 0.6107, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.7401574803149606, |
|
"eval_loss": 0.5690832734107971, |
|
"eval_runtime": 433.3967, |
|
"eval_samples_per_second": 2.051, |
|
"eval_steps_per_second": 2.051, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6320000000000005e-06, |
|
"loss": 0.4043, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.2400000000000003e-06, |
|
"loss": 0.5344, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.84e-06, |
|
"loss": 0.7056, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.4400000000000004e-06, |
|
"loss": 0.5719, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.04e-06, |
|
"loss": 0.6255, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.7435320584926884, |
|
"eval_loss": 0.570974588394165, |
|
"eval_runtime": 433.4106, |
|
"eval_samples_per_second": 2.051, |
|
"eval_steps_per_second": 2.051, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6400000000000002e-06, |
|
"loss": 0.5958, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.2400000000000002e-06, |
|
"loss": 0.5984, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.480000000000001e-07, |
|
"loss": 0.6103, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.4800000000000004e-07, |
|
"loss": 0.612, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8e-08, |
|
"loss": 0.7034, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7435320584926884, |
|
"eval_loss": 0.5713425874710083, |
|
"eval_runtime": 434.3078, |
|
"eval_samples_per_second": 2.047, |
|
"eval_steps_per_second": 2.047, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2500, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6507886672973633, |
|
"train_runtime": 17457.236, |
|
"train_samples_per_second": 0.573, |
|
"train_steps_per_second": 0.143 |
|
} |
|
], |
|
"max_steps": 2500, |
|
"num_train_epochs": 1, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|