|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.5626695604991863, |
|
"eval_steps": 500, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0017362995116657625, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5893, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003472599023331525, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8982, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005208898534997287, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.753, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00694519804666305, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7282, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008681497558328812, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7196, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010417797069994574, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6667, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012154096581660336, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6529, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0138903960933261, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6429, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01562669560499186, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6237, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.017362995116657624, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6071, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.019099294628323386, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6835, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.020835594139989148, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6877, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02257189365165491, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6685, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02430819316332067, |
|
"grad_norm": 0.1220703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6585, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.026044492674986434, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6389, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0277807921866522, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6316, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02951709169831796, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.636, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03125339120998372, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6358, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.032989690721649485, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6146, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03472599023331525, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6141, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03646228974498101, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.686, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03819858925664677, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6513, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03993488876831253, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6712, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.041671188279978295, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6164, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04340748779164406, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6196, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.04514378730330982, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6477, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04688008681497558, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6087, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04861638632664134, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6002, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.050352685838307105, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5885, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.05208898534997287, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5707, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05382528486163863, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7143, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0555615843733044, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6974, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05729788388497016, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6538, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.05903418339663592, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6114, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.060770482908301685, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6232, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06250678241996745, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6102, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06424308193163321, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6179, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.06597938144329897, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.595, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06771568095496473, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5864, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.0694519804666305, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5673, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07118827997829626, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6848, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.07292457948996202, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6481, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07466087900162778, |
|
"grad_norm": 0.07421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6258, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.07639717851329354, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6184, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0781334780249593, |
|
"grad_norm": 0.07568359375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6172, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.07986977753662507, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6525, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08160607704829083, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5976, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.08334237655995659, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5915, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08507867607162235, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6034, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.08681497558328811, |
|
"grad_norm": 0.103515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5716, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08855127509495388, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6715, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.09028757460661964, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6533, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0920238741182854, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.624, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.09376017362995116, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6412, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09549647314161692, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6201, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.09723277265328269, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5984, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09896907216494845, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6205, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.10070537167661421, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6026, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.10244167118827997, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5764, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.10417797069994574, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5867, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1059142702116115, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6639, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.10765056972327726, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6293, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.10938686923494302, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6242, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.1111231687466088, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6161, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11285946825827456, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6206, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.11459576776994032, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5803, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11633206728160608, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5955, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.11806836679327185, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6019, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.11980466630493761, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5881, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.12154096581660337, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5668, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12327726532826913, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6715, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.1250135648399349, |
|
"grad_norm": 0.12353515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6296, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.12674986435160066, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6437, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.12848616386326642, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6264, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13022246337493218, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5922, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.13195876288659794, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6201, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1336950623982637, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.612, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.13543136190992947, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6103, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.13716766142159523, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5849, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.138903960933261, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5548, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14064026044492675, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.667, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.1423765599565925, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.646, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.14411285946825828, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.618, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.14584915897992404, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.616, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1475854584915898, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6078, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.14932175800325556, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5866, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.15105805751492132, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6054, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.15279435702658709, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5834, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.15453065653825285, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.577, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.1562669560499186, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5994, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.15800325556158437, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6595, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.15973955507325013, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6378, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1614758545849159, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6261, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.16321215409658166, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6223, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.16494845360824742, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5911, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.16668475311991318, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5881, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.16842105263157894, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.609, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.1701573521432447, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5948, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.17189365165491047, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5808, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.17362995116657623, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5598, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.175366250678242, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6422, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.17710255018990775, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.629, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.17883884970157352, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6156, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.18057514921323928, |
|
"grad_norm": 0.07373046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5992, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.18231144872490504, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6237, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.1840477482365708, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5895, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.18578404774823656, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5817, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.18752034725990233, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5749, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1892566467715681, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5926, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.19099294628323385, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5499, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1927292457948996, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6498, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.19446554530656537, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6104, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.19620184481823114, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.638, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.1979381443298969, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6012, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.19967444384156266, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5884, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.20141074335322842, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5895, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.20314704286489418, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5914, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.20488334237655995, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5623, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2066196418882257, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5724, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.20835594139989147, |
|
"grad_norm": 0.10693359375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.564, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21009224091155723, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6637, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.211828540423223, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6184, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.21356483993488876, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6122, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.21530113944655452, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5952, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.21703743895822028, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5696, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.21877373846988604, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5904, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2205100379815518, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5874, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.2222463374932176, |
|
"grad_norm": 0.07568359375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.558, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.22398263700488336, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5814, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.22571893651654912, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5751, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.22745523602821488, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6308, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.22919153553988064, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6092, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2309278350515464, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.605, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.23266413456321217, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5918, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.23440043407487793, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5898, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.2361367335865437, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5725, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.23787303309820945, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5886, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.23960933260987521, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5835, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.24134563212154098, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5814, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.24308193163320674, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5461, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2448182311448725, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6425, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.24655453065653826, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6114, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.24829083016820402, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6252, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.2500271296798698, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6298, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.25176342919153555, |
|
"grad_norm": 0.072265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5843, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.2534997287032013, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5662, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2552360282148671, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5884, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.25697232772653283, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.558, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2587086272381986, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5606, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.26044492674986436, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5664, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2621812262615301, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6668, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.2639175257731959, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6097, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.26565382528486164, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5995, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.2673901247965274, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5869, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.26912642430819317, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5828, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.27086272381985893, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5629, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2725990233315247, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5884, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.27433532284319045, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5873, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2760716223548562, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5427, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.277807921866522, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5518, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.27954422137818774, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6357, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.2812805208898535, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.606, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.28301682040151926, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6212, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.284753119913185, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5825, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2864894194248508, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6058, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.28822571893651655, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5957, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2899620184481823, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5706, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.2916983179598481, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5912, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.29343461747151384, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5385, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.2951709169831796, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5412, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.29690721649484536, |
|
"grad_norm": 0.10009765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6608, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.2986435160065111, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6065, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3003798155181769, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6268, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.30211611502984265, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6079, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3038524145415084, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5651, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.30558871405317417, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6071, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.30732501356483993, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5628, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.3090613130765057, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.555, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.31079761258817146, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5856, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.3125339120998372, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5463, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.314270211611503, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6463, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.31600651112316874, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6021, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.3177428106348345, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6048, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.31947911014650027, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5964, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.32121540965816603, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5775, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.3229517091698318, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5967, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.32468800868149755, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5851, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.3264243081931633, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5709, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3281606077048291, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5515, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.32989690721649484, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5503, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3316332067281606, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6643, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.33336950623982636, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6155, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3351058057514921, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6162, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.3368421052631579, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5876, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.33857840477482365, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.603, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.3403147042864894, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5779, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.3420510037981552, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5622, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.34378730330982094, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5827, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3455236028214867, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5575, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.34725990233315246, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5561, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3489962018448182, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.624, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.350732501356484, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6126, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.35246880086814975, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6048, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.3542051003798155, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6119, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.35594139989148127, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6042, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.35767769940314703, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6024, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3594139989148128, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5581, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.36115029842647856, |
|
"grad_norm": 0.07373046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.568, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.3628865979381443, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5581, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.3646228974498101, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5342, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.36635919696147584, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6637, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.3680954964731416, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6307, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.36983179598480737, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6055, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.3715680954964731, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5952, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3733043950081389, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.574, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.37504069451980465, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5837, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3767769940314704, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5858, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.3785132935431362, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5847, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.38024959305480194, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5528, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.3819858925664677, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5499, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.38372219207813346, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6284, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.3854584915897992, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.592, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.387194791101465, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6212, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.38893109061313075, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5682, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3906673901247965, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5788, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.39240368963646227, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5981, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.39413998914812803, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5724, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.3958762886597938, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5745, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.39761258817145956, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5717, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.3993488876831253, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5558, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.4010851871947911, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6393, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.40282148670645684, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5985, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.4045577862181226, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5995, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.40629408572978837, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5989, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.40803038524145413, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5882, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.4097666847531199, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5707, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.41150298426478565, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5845, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.4132392837764514, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5737, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.4149755832881172, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5532, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.41671188279978294, |
|
"grad_norm": 0.10693359375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5316, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4184481823114487, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6414, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.42018448182311446, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6284, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.4219207813347802, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6139, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.423657080846446, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5638, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.42539338035811175, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5654, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.4271296798697775, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5751, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.4288659793814433, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5615, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.43060227889310904, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5688, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.4323385784047748, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5581, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.43407487791644056, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5487, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4358111774281063, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.621, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.4375474769397721, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6001, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.43928377645143785, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6088, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.4410200759631036, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5846, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.44275637547476937, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5702, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.4444926749864352, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5899, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.44622897449810095, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5708, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.4479652740097667, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5482, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.4497015735214325, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5552, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.45143787303309824, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5527, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.453174172544764, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6432, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.45491047205642976, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6055, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.4566467715680955, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5956, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.4583830710797613, |
|
"grad_norm": 0.07666015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6115, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.46011937059142705, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5875, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.4618556701030928, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5825, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.46359196961475857, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5734, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.46532826912642433, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5613, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.4670645686380901, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5642, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.46880086814975586, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5524, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4705371676614216, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6477, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.4722734671730874, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6066, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.47400976668475314, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.629, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.4757460661964189, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5795, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.47748236570808467, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5799, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.47921866521975043, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5747, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4809549647314162, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5651, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.48269126424308195, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5481, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.4844275637547477, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5468, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.4861638632664135, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5365, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.48790016277807924, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6338, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.489636462289745, |
|
"grad_norm": 0.07568359375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6007, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.49137276180141076, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6057, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.4931090613130765, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6155, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4948453608247423, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.57, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.49658166033640805, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5907, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.4983179598480738, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5359, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.5000542593597396, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5629, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.5017905588714053, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5373, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.5035268583830711, |
|
"grad_norm": 0.125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5529, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5052631578947369, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6634, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.5069994574064026, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6184, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5087357569180684, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5991, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.5104720564297341, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5769, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.5122083559413999, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5839, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.5139446554530657, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5862, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.5156809549647314, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5564, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.5174172544763972, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5783, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.519153553988063, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5545, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.5208898534997287, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5363, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5226261530113945, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6366, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.5243624525230602, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6177, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.526098752034726, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5899, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.5278350515463918, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5817, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.5295713510580575, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.613, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.5313076505697233, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5731, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.533043950081389, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5645, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.5347802495930548, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5638, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.5365165491047206, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.57, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.5382528486163863, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5125, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5399891481280521, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6514, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.5417254476397179, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6106, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5434617471513836, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6017, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.5451980466630494, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5875, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5469343461747151, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5754, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.5486706456863809, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5631, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5504069451980467, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5912, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.5521432447097124, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5605, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5538795442213782, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5789, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.555615843733044, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5402, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5573521432447097, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6621, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.5590884427563755, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5926, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5608247422680412, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.597, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.562561041779707, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.59, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5642973412913728, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.58, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.5660336408030385, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5573, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5677699403147043, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5645, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.56950623982637, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5652, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5712425393380358, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5656, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.5729788388497016, |
|
"grad_norm": 0.10205078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5316, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5747151383613673, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6293, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.5764514378730331, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5879, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5781877373846989, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5911, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.5799240368963646, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6114, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5816603364080304, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5894, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.5833966359196961, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5814, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5851329354313619, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5563, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.5868692349430277, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5643, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5886055344546934, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5682, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.5903418339663592, |
|
"grad_norm": 0.123046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5542, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.592078133478025, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6263, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.5938144329896907, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5931, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5955507325013565, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5997, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.5972870320130222, |
|
"grad_norm": 0.07666015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.601, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.599023331524688, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5776, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.6007596310363538, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5524, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.6024959305480195, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.559, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.6042322300596853, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.566, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.6059685295713511, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5558, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.6077048290830168, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5329, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6094411285946826, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6309, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.6111774281063483, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6086, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.6129137276180141, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5779, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.6146500271296799, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5789, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.6163863266413456, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5646, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.6181226261530114, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5693, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.6198589256646772, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5838, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.6215952251763429, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5669, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.6233315246880087, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5599, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.6250678241996744, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.526, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6268041237113402, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6346, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.628540423223006, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5954, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.6302767227346717, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6007, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.6320130222463375, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6013, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.6337493217580032, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.582, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.635485621269669, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5971, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.6372219207813348, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5977, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.6389582202930005, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5772, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.6406945198046663, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5406, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.6424308193163321, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5287, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6441671188279978, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6217, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.6459034183396636, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5884, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.6476397178513293, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5774, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.6493760173629951, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.579, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6511123168746609, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5699, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.6528486163863266, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5609, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6545849158979924, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5363, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.6563212154096582, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5464, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6580575149213239, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5455, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.6597938144329897, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5383, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6615301139446554, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6543, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.6632664134563212, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5899, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.665002712967987, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6015, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.6667390124796527, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5774, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.6684753119913185, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5683, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.6702116115029843, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5704, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.67194791101465, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.568, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.6736842105263158, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5754, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6754205100379815, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5484, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.6771568095496473, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5623, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6788931090613131, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6379, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.6806294085729788, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5979, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.6823657080846446, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5915, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.6841020075963103, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5872, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6858383071079761, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5602, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.6875746066196419, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5689, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6893109061313076, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5802, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.6910472056429734, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5512, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6927835051546392, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5593, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.6945198046663049, |
|
"grad_norm": 0.10693359375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5352, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6962561041779707, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6434, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.6979924036896364, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5892, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6997287032013022, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.582, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.701465002712968, |
|
"grad_norm": 0.07177734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5925, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.7032013022246337, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5744, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.7049376017362995, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5867, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.7066739012479653, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5724, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.708410200759631, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5759, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.7101465002712968, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5494, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.7118827997829625, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5249, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.7136190992946283, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6765, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.7153553988062941, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6125, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.7170916983179598, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5812, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.7188279978296256, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6022, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.7205642973412913, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5859, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.7223005968529571, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5706, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.7240368963646229, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5719, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.7257731958762886, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5186, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.7275094953879544, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5581, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.7292457948996202, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5278, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7309820944112859, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6465, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.7327183939229517, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5938, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.7344546934346174, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5955, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.7361909929462832, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5655, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.737927292457949, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5625, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.7396635919696147, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5637, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.7413998914812805, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.586, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.7431361909929463, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5656, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.744872490504612, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5531, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.7466087900162778, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5338, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7483450895279435, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6171, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.7500813890396093, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6082, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.7518176885512751, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5923, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.7535539880629408, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5825, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.7552902875746066, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5779, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.7570265870862724, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5697, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.7587628865979381, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5713, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.7604991861096039, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5573, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.7622354856212696, |
|
"grad_norm": 0.07666015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5519, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.7639717851329354, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5341, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7657080846446012, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6526, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.7674443841562669, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.599, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.7691806836679327, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5808, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.7709169831795984, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6057, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7726532826912642, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5721, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.77438958220293, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5371, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.7761258817145957, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5391, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.7778621812262615, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6048, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.7795984807379273, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.559, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.781334780249593, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5331, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.7830710797612588, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6412, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.7848073792729245, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5695, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.7865436787845903, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6136, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.7882799782962561, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5839, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7900162778079218, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5846, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.7917525773195876, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5707, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.7934888768312534, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5751, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.7952251763429191, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5419, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.7969614758545849, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5473, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.7986977753662506, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5454, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8004340748779164, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6309, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.8021703743895822, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5912, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.8039066739012479, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5857, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.8056429734129137, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5795, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.8073792729245794, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5802, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.8091155724362452, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5886, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.810851871947911, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5625, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.8125881714595767, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5393, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.8143244709712425, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5464, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.8160607704829083, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5185, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.817797069994574, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6165, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.8195333695062398, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5872, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.8212696690179055, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6047, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.8230059685295713, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5829, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5789, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.8264785675529028, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5606, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.8282148670645686, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5785, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.8299511665762344, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5475, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.8316874660879001, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5642, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.8334237655995659, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5357, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8351600651112316, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6205, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.8368963646228974, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5965, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.8386326641345632, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5877, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.8403689636462289, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5627, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5802, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.8438415626695605, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5573, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.8455778621812262, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5371, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.847314161692892, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5425, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.8490504612045577, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5507, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.8507867607162235, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.534, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8525230602278893, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6331, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.854259359739555, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5733, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.8559956592512208, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5843, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.8577319587628865, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5916, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.8594682582745523, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5777, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.8612045577862181, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.549, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.8629408572978838, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5946, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.8646771568095496, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5347, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.8664134563212154, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5518, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.8681497558328811, |
|
"grad_norm": 0.10009765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5396, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8698860553445469, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.62, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.8716223548562126, |
|
"grad_norm": 0.07666015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5824, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.8733586543678784, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.587, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.8750949538795442, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6005, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.8768312533912099, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.572, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.8785675529028757, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5703, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.8803038524145415, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5616, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.8820401519262072, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5447, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.883776451437873, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5544, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.8855127509495387, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5379, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8872490504612045, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6229, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.8889853499728704, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6059, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.8907216494845361, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5886, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.8924579489962019, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.568, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.8941942485078677, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5754, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.8959305480195334, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5797, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.8976668475311992, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.562, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.899403147042865, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5673, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.9011394465545307, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5618, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.9028757460661965, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5551, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9046120455778622, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6372, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.906348345089528, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5885, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.9080846446011938, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5654, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.9098209441128595, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5681, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.9115572436245253, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.589, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.913293543136191, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5682, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.9150298426478568, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5597, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.9167661421595226, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.544, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.9185024416711883, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5376, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.9202387411828541, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5302, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.9219750406945199, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6424, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.9237113402061856, |
|
"grad_norm": 0.0732421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5671, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.9254476397178514, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5829, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.9271839392295171, |
|
"grad_norm": 0.07666015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5677, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.9289202387411829, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5692, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.9306565382528487, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5385, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.9323928377645144, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5567, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.9341291372761802, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5514, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.935865436787846, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5348, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.9376017362995117, |
|
"grad_norm": 0.10546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.538, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9393380358111775, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6329, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.9410743353228432, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5578, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.942810634834509, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6024, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.9445469343461748, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5662, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.9462832338578405, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.576, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.9480195333695063, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5775, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.949755832881172, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5676, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.9514921323928378, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5518, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.9532284319045036, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5268, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.9549647314161693, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5271, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.9567010309278351, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6298, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.9584373304395009, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5939, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.9601736299511666, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5923, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.9619099294628324, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5839, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.9636462289744981, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5693, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.9653825284861639, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5846, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.9671188279978297, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5524, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.9688551275094954, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.564, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.9705914270211612, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5492, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.972327726532827, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5216, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9740640260444927, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6251, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.9758003255561585, |
|
"grad_norm": 0.0751953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6015, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.9775366250678242, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5966, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.97927292457949, |
|
"grad_norm": 0.07763671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5898, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.9810092240911558, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5714, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.9827455236028215, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5605, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.9844818231144873, |
|
"grad_norm": 0.0791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5605, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.986218122626153, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.507, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.9879544221378188, |
|
"grad_norm": 0.08154296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5446, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.9896907216494846, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.556, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9914270211611503, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.641, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.9931633206728161, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6115, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.9948996201844819, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5609, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.9966359196961476, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5724, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.9983722192078134, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5574, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.0001085187194791, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5421, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.0018448182311448, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5708, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.0035811177428107, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.562, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.0053174172544763, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5719, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.0070537167661422, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5257, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.0087900162778078, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5598, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.0105263157894737, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.55, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.0122626153011394, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5331, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.0139989148128052, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5241, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.015735214324471, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5001, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.0174715138361368, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5236, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.0192078133478024, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6199, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.0209441128594683, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5554, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.022680412371134, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5494, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.0244167118827998, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5589, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.0261530113944655, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5521, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.0278893109061313, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5309, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.029625610417797, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5164, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.0313619099294629, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5358, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.0330982094411285, |
|
"grad_norm": 0.1103515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.535, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.0348345089527944, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5314, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.03657080846446, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5795, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.038307107976126, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5741, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.0400434074877916, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6019, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.0417797069994574, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5506, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.043516006511123, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5319, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.045252306022789, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5308, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.0469886055344546, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5266, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.0487249050461205, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5151, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.0504612045577861, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5163, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.052197504069452, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.504, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.0539338035811177, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6077, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.0556701030927835, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5518, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.0574064026044492, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5758, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.059142702116115, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5648, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.0608790016277807, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.544, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.0626153011394466, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5354, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.0643516006511122, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.534, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.066087900162778, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5356, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.0678241996744438, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5224, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.0695604991861096, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5114, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.0712967986977753, |
|
"grad_norm": 0.10205078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5914, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.0730330982094411, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5733, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.0747693977211068, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.57, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.0765056972327727, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5571, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.0782419967444383, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5568, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.0799782962561042, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5211, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.0817145957677698, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5081, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.0834508952794357, |
|
"grad_norm": 0.11865234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.523, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.0851871947911014, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5121, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.0869234943027672, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5159, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.088659793814433, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6119, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.0903960933260988, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5505, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.0921323928377644, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5409, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.0938686923494303, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5636, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.095604991861096, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5607, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.0973412913727618, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.529, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.0990775908844275, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5505, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.1008138903960933, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4906, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.102550189907759, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5309, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.1042864894194249, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5048, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.1060227889310905, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5913, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.1077590884427564, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5599, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.109495387954422, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5512, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.111231687466088, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5476, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.1129679869777536, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5307, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.1147042864894194, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5333, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.116440586001085, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5428, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.118176885512751, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5391, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.1199131850244166, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4993, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.1216494845360825, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5149, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.1233857840477481, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5878, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.125122083559414, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5521, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.1268583830710797, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.565, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.1285946825827455, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5652, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.1303309820944114, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5465, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.132067281606077, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5407, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.1338035811177427, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5105, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.1355398806294086, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5218, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.1372761801410745, |
|
"grad_norm": 0.10205078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5383, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.13901247965274, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5039, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.1407487791644058, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5962, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.1424850786760716, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5774, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.1442213781877375, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5479, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.1459576776994032, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.567, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.1476939772110688, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5492, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.1494302767227347, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5227, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.1511665762344006, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5537, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.1529028757460662, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.538, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.1546391752577319, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5197, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.1563754747693977, |
|
"grad_norm": 0.10205078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5175, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.1581117742810636, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5945, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.1598480737927293, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5821, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.161584373304395, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5615, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.1633206728160608, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5686, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.1650569723277266, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5424, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.1667932718393923, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5342, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.168529571351058, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5311, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.1702658708627238, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5232, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.1720021703743897, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5082, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.1737384698860553, |
|
"grad_norm": 0.10498046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4812, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.175474769397721, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6006, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.1772110689093869, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5666, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.1789473684210527, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5628, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.1806836679327184, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5517, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.182419967444384, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5596, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.18415626695605, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5334, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.1858925664677158, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5534, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.1876288659793814, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5223, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.189365165491047, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5088, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.191101465002713, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5168, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.1928377645143788, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6175, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.1945740640260445, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5669, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.1963103635377101, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5467, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.198046663049376, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5485, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.1997829625610419, |
|
"grad_norm": 0.0859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5267, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.2015192620727075, |
|
"grad_norm": 0.10009765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5379, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.2032555615843732, |
|
"grad_norm": 0.10009765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5153, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.204991861096039, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5399, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.206728160607705, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5223, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.2084644601193706, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5079, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.2102007596310362, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6126, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.2119370591427021, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5664, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.213673358654368, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5839, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.2154096581660336, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5499, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2171459576776993, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5512, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.2188822571893652, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5418, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.220618556701031, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5363, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.2223548562126967, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4974, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.2240911557243623, |
|
"grad_norm": 0.10546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5168, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.2258274552360282, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5311, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.227563754747694, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5978, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.2293000542593597, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5686, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.2310363537710254, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.57, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.2327726532826913, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5602, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.2345089527943571, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5633, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.2362452523060228, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5452, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.2379815518176884, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5294, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.2397178513293543, |
|
"grad_norm": 0.10009765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5362, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.2414541508410202, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5045, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.2431904503526858, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5148, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.2449267498643515, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6032, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.2466630493760174, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5733, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.2483993488876832, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5505, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.2501356483993489, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5486, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.2518719479110145, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5346, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.2536082474226804, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5737, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.2553445469343463, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5388, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.257080846446012, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5117, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.2588171459576776, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.526, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.2605534454693434, |
|
"grad_norm": 0.1015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5235, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.2622897449810093, |
|
"grad_norm": 0.103515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5946, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.264026044492675, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5657, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.2657623440043406, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.559, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.2674986435160065, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5418, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.2692349430276724, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5389, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.270971242539338, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5283, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.2727075420510037, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5323, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.2744438415626695, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5311, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.2761801410743354, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5259, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.277916440586001, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4963, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.2796527400976667, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5846, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 1.2813890396093326, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5784, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.2831253391209985, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5807, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 1.2848616386326641, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.575, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.2865979381443298, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5546, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 1.2883342376559956, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5439, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.2900705371676615, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5331, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 1.2918068366793272, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5203, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.2935431361909928, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5172, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.2952794357026587, |
|
"grad_norm": 0.10498046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5059, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.2970157352143246, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6053, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 1.2987520347259902, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5408, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.3004883342376559, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5653, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 1.3022246337493217, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5496, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.3039609332609876, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5394, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 1.3056972327726533, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5277, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.307433532284319, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5365, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 1.3091698317959848, |
|
"grad_norm": 0.10205078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5384, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.3109061313076507, |
|
"grad_norm": 0.103515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.515, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.3126424308193163, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5242, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.314378730330982, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6133, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 1.3161150298426478, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5875, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.3178513293543137, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5758, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 1.3195876288659794, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5549, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.321323928377645, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5265, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 1.3230602278893109, |
|
"grad_norm": 0.12060546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5568, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.3247965274009768, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.533, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 1.3265328269126424, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.523, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.328269126424308, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5199, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.330005425935974, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4952, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.3317417254476398, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5991, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 1.3334780249593055, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5884, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.3352143244709713, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.585, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 1.336950623982637, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5728, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.3386869234943028, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5532, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 1.3404232230059685, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5284, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.3421595225176344, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5246, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 1.3438958220293, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5144, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.345632121540966, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5251, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.3473684210526315, |
|
"grad_norm": 0.125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5032, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.3491047205642974, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6069, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 1.350841020075963, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.562, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.352577319587629, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5471, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 1.3543136190992946, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5352, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.3560499186109605, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5347, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 1.3577862181226261, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5511, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.359522517634292, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5502, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 1.3612588171459576, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.521, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.3629951166576235, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5171, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.3647314161692892, |
|
"grad_norm": 0.10498046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5107, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.366467715680955, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.584, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 1.3682040151926207, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5586, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.3699403147042866, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5453, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 1.3716766142159522, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5484, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.373412913727618, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5584, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 1.3751492132392837, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5369, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.3768855127509496, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5417, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 1.3786218122626153, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5264, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.3803581117742811, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5208, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.3820944112859468, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5033, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.3838307107976127, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6101, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 1.3855670103092783, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5747, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.3873033098209442, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5823, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 1.3890396093326098, |
|
"grad_norm": 0.1015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5549, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.3907759088442757, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5353, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 1.3925122083559414, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5465, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.3942485078676072, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5231, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 1.3959848073792729, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5563, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.3977211068909388, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5128, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.3994574064026044, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5252, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.4011937059142703, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6021, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 1.402930005425936, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5592, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.4046663049376018, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5766, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 1.4064026044492675, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5445, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.4081389039609333, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5558, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 1.409875203472599, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5429, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.4116115029842649, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5271, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 1.4133478024959305, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5367, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.4150841020075964, |
|
"grad_norm": 0.1015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5236, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.416820401519262, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4968, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.418556701030928, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5748, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 1.4202930005425936, |
|
"grad_norm": 0.087890625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.557, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.4220293000542594, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5561, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 1.423765599565925, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5618, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.425501899077591, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5374, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 1.4272381985892566, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5433, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.4289744981009225, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5319, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 1.4307107976125881, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5013, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.432447097124254, |
|
"grad_norm": 0.10498046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5148, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.4341833966359196, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5227, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.4359196961475855, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5834, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 1.4376559956592512, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5549, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.439392295170917, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5536, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 1.4411285946825827, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5376, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.4428648941942486, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5382, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 1.4446011937059142, |
|
"grad_norm": 0.095703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5469, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.44633749321758, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.536, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 1.4480737927292457, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5217, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.4498100922409116, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5243, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.4515463917525773, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5278, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.4532826912642431, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6001, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 1.4550189907759088, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5794, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.4567552902875747, |
|
"grad_norm": 0.1015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5589, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 1.4584915897992403, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.569, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.4602278893109062, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5364, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 1.4619641888225718, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5103, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.4637004883342377, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5303, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 1.4654367878459034, |
|
"grad_norm": 0.10205078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5361, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.4671730873575692, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5164, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.468909386869235, |
|
"grad_norm": 0.103515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.51, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.4706456863809008, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5873, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 1.4723819858925664, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5682, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.4741182854042323, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5664, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 1.475854584915898, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5612, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.4775908844275638, |
|
"grad_norm": 0.10205078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5583, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 1.4793271839392295, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5491, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.4810634834508953, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5347, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 1.482799782962561, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5397, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.4845360824742269, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5408, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.4862723819858925, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4996, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.4880086814975584, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.571, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 1.489744981009224, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5733, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.49148128052089, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5844, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 1.4932175800325556, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5536, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.4949538795442214, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5541, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 1.496690179055887, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5393, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.498426478567553, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5572, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 1.5001627780792188, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5208, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.5018990775908845, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5271, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.5036353771025501, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.505, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.505371676614216, |
|
"grad_norm": 0.1015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5897, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 1.5071079761258819, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5585, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.5088442756375475, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5582, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 1.5105805751492132, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5505, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.512316874660879, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5565, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 1.514053174172545, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5449, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.5157894736842106, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5326, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 1.5175257731958762, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5193, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.519262072707542, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5321, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.520998372219208, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5121, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.5227346717308736, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5821, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 1.5244709712425393, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5658, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.5262072707542051, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5667, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 1.527943570265871, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5378, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.5296798697775367, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.545, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 1.5314161692892023, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5256, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.5331524688008682, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5235, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 1.534888768312534, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.534, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.5366250678241997, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5229, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.5383613673358654, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4966, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.5400976668475312, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5806, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 1.5418339663591971, |
|
"grad_norm": 0.08935546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5782, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.5435702658708628, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.554, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 1.5453065653825284, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5614, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.5470428648941943, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.528, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 1.5487791644058602, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5402, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.5505154639175258, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4931, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 1.5522517634291915, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5183, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.5539880629408573, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5075, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.5557243624525232, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5028, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.5574606619641889, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5985, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 1.5591969614758545, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5599, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.5609332609875204, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5674, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 1.5626695604991863, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5414, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5626695604991863, |
|
"step": 4500, |
|
"total_flos": 4.510419270260736e+18, |
|
"train_loss": 0.5746156393686931, |
|
"train_runtime": 211364.5698, |
|
"train_samples_per_second": 1.363, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 90, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.510419270260736e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|