|
{ |
|
"best_metric": 1.0593422651290894, |
|
"best_model_checkpoint": "output/queen/checkpoint-680", |
|
"epoch": 10.0, |
|
"global_step": 680, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00013537785052914354, |
|
"loss": 3.396, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001300082017869573, |
|
"loss": 3.2987, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001213763104094255, |
|
"loss": 2.8576, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000109940736055617, |
|
"loss": 2.9968, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.630898093421199e-05, |
|
"loss": 2.9324, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.120521692221673e-05, |
|
"loss": 2.9931, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.543181473690228e-05, |
|
"loss": 2.8919, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.982671888105512e-05, |
|
"loss": 2.7977, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.5218932770313667e-05, |
|
"loss": 2.9689, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.2384478845846175e-05, |
|
"loss": 2.756, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.200517324255647e-05, |
|
"loss": 2.8716, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.6324050628611986e-06, |
|
"loss": 2.9236, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.578444419609313e-07, |
|
"loss": 2.8184, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.78603196144104, |
|
"eval_runtime": 4.3884, |
|
"eval_samples_per_second": 21.192, |
|
"eval_steps_per_second": 2.734, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.926355061606279e-07, |
|
"loss": 2.8201, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.55617957525975e-06, |
|
"loss": 2.7876, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.0275104488948473e-05, |
|
"loss": 2.8738, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.009247481060283e-05, |
|
"loss": 2.6439, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.2486753626613365e-05, |
|
"loss": 2.5993, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.679950861668151e-05, |
|
"loss": 2.7072, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 6.227039054081748e-05, |
|
"loss": 2.7465, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.807752594969911e-05, |
|
"loss": 2.5926, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.338117830043867e-05, |
|
"loss": 2.7178, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00010736835802938978, |
|
"loss": 2.7027, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00011929601172133719, |
|
"loss": 2.4457, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00012853049599834097, |
|
"loss": 2.6806, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00013458123912165538, |
|
"loss": 2.4508, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00013712680207144277, |
|
"loss": 2.6492, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.3717236518859863, |
|
"eval_runtime": 4.3106, |
|
"eval_samples_per_second": 22.039, |
|
"eval_steps_per_second": 2.784, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.222139835357666, |
|
"eval_runtime": 3.7161, |
|
"eval_samples_per_second": 23.412, |
|
"eval_steps_per_second": 2.96, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00013691577939766304, |
|
"loss": 2.3223, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00013374528334456193, |
|
"loss": 2.4227, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00012721317115188912, |
|
"loss": 2.3956, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00011765651179294606, |
|
"loss": 2.3273, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0001055684464238085, |
|
"loss": 2.4048, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 9.157274139492967e-05, |
|
"loss": 2.4168, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.639160076306266e-05, |
|
"loss": 2.4496, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 6.0808399236937385e-05, |
|
"loss": 2.0669, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.5627258605070364e-05, |
|
"loss": 2.2692, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.1631553576191535e-05, |
|
"loss": 2.1242, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.954348820705391e-05, |
|
"loss": 2.3064, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 9.986828848110892e-06, |
|
"loss": 2.146, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.454716655438096e-06, |
|
"loss": 2.3787, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.8422060233695394e-07, |
|
"loss": 2.2533, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.043508768081665, |
|
"eval_runtime": 3.8679, |
|
"eval_samples_per_second": 22.493, |
|
"eval_steps_per_second": 2.844, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 6.389441019077102e-07, |
|
"loss": 2.1995, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.500582770777967e-06, |
|
"loss": 2.2239, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.166986896886688e-05, |
|
"loss": 2.1124, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 2.177685437520025e-05, |
|
"loss": 2.2442, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.4300000000000014e-05, |
|
"loss": 2.0765, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 4.859308855339902e-05, |
|
"loss": 2.042, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.391857044318346e-05, |
|
"loss": 2.0462, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 7.948562268689874e-05, |
|
"loss": 1.9776, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 9.449095682862937e-05, |
|
"loss": 2.1824, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.0001081602700970798, |
|
"loss": 1.996, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00011978820084915117, |
|
"loss": 2.1329, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.00012877472652481797, |
|
"loss": 2.1854, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00013465612591205902, |
|
"loss": 1.8201, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00013712890801216552, |
|
"loss": 1.8969, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.9531660079956055, |
|
"eval_runtime": 3.8382, |
|
"eval_samples_per_second": 22.667, |
|
"eval_steps_per_second": 2.866, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0001360654727320747, |
|
"loss": 1.817, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.0001315206952832741, |
|
"loss": 1.6319, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.00012372909452021153, |
|
"loss": 1.7738, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.00011309273133714503, |
|
"loss": 1.7757, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0001001604615883571, |
|
"loss": 1.9699, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 8.5599614119113e-05, |
|
"loss": 1.9681, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 7.016155537292879e-05, |
|
"loss": 1.9245, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 5.464291750458938e-05, |
|
"loss": 1.7497, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 3.98444906994169e-05, |
|
"loss": 1.7975, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 2.6529900930537204e-05, |
|
"loss": 1.6675, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.5386205457676833e-05, |
|
"loss": 1.9876, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 6.988439412050469e-06, |
|
"loss": 1.691, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.7699429285993718e-06, |
|
"loss": 1.8689, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.9327, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.8510653972625732, |
|
"eval_runtime": 3.8275, |
|
"eval_samples_per_second": 22.73, |
|
"eval_steps_per_second": 2.874, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 1.7699429285993566e-06, |
|
"loss": 1.5991, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 6.988439412050431e-06, |
|
"loss": 1.6155, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 1.538620545767678e-05, |
|
"loss": 1.6707, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 2.6529900930537035e-05, |
|
"loss": 1.7262, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 3.984449069941682e-05, |
|
"loss": 1.7087, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 5.4642917504589295e-05, |
|
"loss": 1.7766, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 7.016155537292883e-05, |
|
"loss": 1.5992, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 8.559961411911291e-05, |
|
"loss": 1.6561, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00010016046158835702, |
|
"loss": 1.6108, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.00011309273133714504, |
|
"loss": 1.6236, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.0001237290945202115, |
|
"loss": 1.6319, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.00013152069528327408, |
|
"loss": 1.4268, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00013606547273207472, |
|
"loss": 1.7278, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.8254233598709106, |
|
"eval_runtime": 3.854, |
|
"eval_samples_per_second": 22.574, |
|
"eval_steps_per_second": 2.854, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.00013712890801216552, |
|
"loss": 1.8762, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00013465612591205902, |
|
"loss": 1.4495, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.00012877472652481797, |
|
"loss": 1.5224, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.00011978820084915123, |
|
"loss": 1.5038, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00010816027009708009, |
|
"loss": 1.4097, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 9.449095682862935e-05, |
|
"loss": 1.3203, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 7.948562268689883e-05, |
|
"loss": 1.3634, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 6.39185704431838e-05, |
|
"loss": 1.4181, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 4.859308855339899e-05, |
|
"loss": 1.5025, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 3.429999999999998e-05, |
|
"loss": 1.5881, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 2.1776854375200487e-05, |
|
"loss": 1.5042, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.1669868968866859e-05, |
|
"loss": 1.4229, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 4.500582770777952e-06, |
|
"loss": 1.5551, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 6.389441019077407e-07, |
|
"loss": 1.6253, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.787222146987915, |
|
"eval_runtime": 3.8357, |
|
"eval_samples_per_second": 22.681, |
|
"eval_steps_per_second": 2.868, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 2.8422060233696156e-07, |
|
"loss": 1.5683, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 3.4547166554380356e-06, |
|
"loss": 1.1749, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 9.986828848110785e-06, |
|
"loss": 1.2655, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 1.9543488207053935e-05, |
|
"loss": 1.3456, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3.163155357619136e-05, |
|
"loss": 1.2074, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 4.5627258605070174e-05, |
|
"loss": 1.3068, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 6.080839923693748e-05, |
|
"loss": 1.1119, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 7.639160076306239e-05, |
|
"loss": 1.3026, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 9.15727413949297e-05, |
|
"loss": 1.4164, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.00010556844642380854, |
|
"loss": 1.6188, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.00011765651179294598, |
|
"loss": 1.438, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00012721317115188917, |
|
"loss": 1.267, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.00013374528334456193, |
|
"loss": 1.2976, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.00013691577939766304, |
|
"loss": 1.4411, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.795061707496643, |
|
"eval_runtime": 3.8467, |
|
"eval_samples_per_second": 22.617, |
|
"eval_steps_per_second": 2.86, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.0001365610558980923, |
|
"loss": 1.266, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.000132699417229222, |
|
"loss": 1.2792, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.0001255301310311332, |
|
"loss": 1.2561, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.00011542314562479977, |
|
"loss": 1.2462, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.00010289999999999993, |
|
"loss": 1.2013, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 8.860691144660113e-05, |
|
"loss": 1.2052, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 7.328142955681657e-05, |
|
"loss": 1.3885, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 5.771437731310106e-05, |
|
"loss": 1.0822, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 4.270904317137078e-05, |
|
"loss": 1.3064, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 2.9039729902920224e-05, |
|
"loss": 1.2009, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 1.7411799150848703e-05, |
|
"loss": 1.2057, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 8.425273475182102e-06, |
|
"loss": 1.1498, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 2.5438740879410024e-06, |
|
"loss": 1.0197, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 7.109198783448199e-08, |
|
"loss": 1.1622, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.775342345237732, |
|
"eval_runtime": 3.8327, |
|
"eval_samples_per_second": 22.699, |
|
"eval_steps_per_second": 2.87, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 1.2005173242556402e-05, |
|
"loss": 1.3691, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 2.2384478845846314e-05, |
|
"loss": 1.5005, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 3.5218932770313436e-05, |
|
"loss": 1.4065, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 4.982671888105515e-05, |
|
"loss": 1.3225, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 6.543181473690211e-05, |
|
"loss": 1.0367, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 8.120521692221662e-05, |
|
"loss": 1.1784, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 9.630898093421192e-05, |
|
"loss": 1.2778, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.00010994073605561679, |
|
"loss": 1.3754, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 0.00012137631040942535, |
|
"loss": 1.1983, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 0.00013000820178695732, |
|
"loss": 1.3077, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 0.0001353778505291435, |
|
"loss": 1.4969, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 1.3263, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.0593422651290894, |
|
"eval_runtime": 1.2161, |
|
"eval_samples_per_second": 75.649, |
|
"eval_steps_per_second": 9.867, |
|
"step": 680 |
|
} |
|
], |
|
"max_steps": 680, |
|
"num_train_epochs": 10, |
|
"total_flos": 704182026240000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|