|
{ |
|
"best_metric": { |
|
"accuracy": 0.9902156155185724 |
|
}, |
|
"best_model_checkpoint": "./results/checkpoint-69627", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 69627, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007181122265787698, |
|
"grad_norm": 4.7540483474731445, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.7343, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0014362244531575395, |
|
"grad_norm": 3.270306348800659, |
|
"learning_rate": 2.9700000000000004e-06, |
|
"loss": 0.6002, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002154336679736309, |
|
"grad_norm": 8.934218406677246, |
|
"learning_rate": 4.4699999999999996e-06, |
|
"loss": 0.4248, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.002872448906315079, |
|
"grad_norm": 2.0825181007385254, |
|
"learning_rate": 5.940000000000001e-06, |
|
"loss": 0.3267, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0035905611328938486, |
|
"grad_norm": 22.761402130126953, |
|
"learning_rate": 7.44e-06, |
|
"loss": 0.2673, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.004308673359472618, |
|
"grad_norm": 14.120667457580566, |
|
"learning_rate": 8.91e-06, |
|
"loss": 0.1968, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.005026785586051388, |
|
"grad_norm": 30.47674560546875, |
|
"learning_rate": 1.0379999999999999e-05, |
|
"loss": 0.2866, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.005744897812630158, |
|
"grad_norm": 4.9884772300720215, |
|
"learning_rate": 1.1880000000000001e-05, |
|
"loss": 0.1806, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.006463010039208928, |
|
"grad_norm": 7.104131698608398, |
|
"learning_rate": 1.338e-05, |
|
"loss": 0.1843, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.007181122265787697, |
|
"grad_norm": 9.267885208129883, |
|
"learning_rate": 1.488e-05, |
|
"loss": 0.2175, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.007899234492366467, |
|
"grad_norm": 0.08351179212331772, |
|
"learning_rate": 1.6380000000000002e-05, |
|
"loss": 0.1864, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.008617346718945236, |
|
"grad_norm": 4.860571384429932, |
|
"learning_rate": 1.7879999999999998e-05, |
|
"loss": 0.1599, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.009335458945524006, |
|
"grad_norm": 19.020278930664062, |
|
"learning_rate": 1.938e-05, |
|
"loss": 0.2149, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.010053571172102775, |
|
"grad_norm": 21.864152908325195, |
|
"learning_rate": 2.088e-05, |
|
"loss": 0.1844, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.010771683398681547, |
|
"grad_norm": 13.073183059692383, |
|
"learning_rate": 2.238e-05, |
|
"loss": 0.1342, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.011489795625260316, |
|
"grad_norm": 4.226386547088623, |
|
"learning_rate": 2.3880000000000002e-05, |
|
"loss": 0.1602, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.012207907851839086, |
|
"grad_norm": 6.574745178222656, |
|
"learning_rate": 2.538e-05, |
|
"loss": 0.1298, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.012926020078417855, |
|
"grad_norm": 1.3294140100479126, |
|
"learning_rate": 2.688e-05, |
|
"loss": 0.1627, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.013644132304996625, |
|
"grad_norm": 15.410859107971191, |
|
"learning_rate": 2.838e-05, |
|
"loss": 0.1802, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.014362244531575395, |
|
"grad_norm": 7.947683811187744, |
|
"learning_rate": 2.9880000000000002e-05, |
|
"loss": 0.1519, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.015080356758154164, |
|
"grad_norm": 9.371002197265625, |
|
"learning_rate": 2.9993361586676995e-05, |
|
"loss": 0.1402, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.015798468984732934, |
|
"grad_norm": 0.8048446774482727, |
|
"learning_rate": 2.998614592002155e-05, |
|
"loss": 0.1335, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.016516581211311703, |
|
"grad_norm": 1.139202356338501, |
|
"learning_rate": 2.997893025336611e-05, |
|
"loss": 0.1234, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.017234693437890473, |
|
"grad_norm": 20.09372901916504, |
|
"learning_rate": 2.9971714586710667e-05, |
|
"loss": 0.2036, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.017952805664469242, |
|
"grad_norm": 50.06760787963867, |
|
"learning_rate": 2.9964498920055226e-05, |
|
"loss": 0.1759, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.018670917891048012, |
|
"grad_norm": 9.23760986328125, |
|
"learning_rate": 2.995728325339978e-05, |
|
"loss": 0.0863, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.01938903011762678, |
|
"grad_norm": 49.052547454833984, |
|
"learning_rate": 2.995006758674434e-05, |
|
"loss": 0.0894, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.02010714234420555, |
|
"grad_norm": 29.987974166870117, |
|
"learning_rate": 2.99428519200889e-05, |
|
"loss": 0.0983, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.02082525457078432, |
|
"grad_norm": 23.74324607849121, |
|
"learning_rate": 2.9935636253433458e-05, |
|
"loss": 0.1495, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.021543366797363094, |
|
"grad_norm": 11.304362297058105, |
|
"learning_rate": 2.9928420586778013e-05, |
|
"loss": 0.1215, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.022261479023941863, |
|
"grad_norm": 0.33057159185409546, |
|
"learning_rate": 2.992120492012257e-05, |
|
"loss": 0.1711, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.022979591250520633, |
|
"grad_norm": 25.56655502319336, |
|
"learning_rate": 2.991398925346713e-05, |
|
"loss": 0.1592, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.023697703477099402, |
|
"grad_norm": 16.568450927734375, |
|
"learning_rate": 2.9906773586811686e-05, |
|
"loss": 0.1175, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.02441581570367817, |
|
"grad_norm": 70.81444549560547, |
|
"learning_rate": 2.9899557920156244e-05, |
|
"loss": 0.0847, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.02513392793025694, |
|
"grad_norm": 7.089319705963135, |
|
"learning_rate": 2.9892342253500803e-05, |
|
"loss": 0.1262, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.02585204015683571, |
|
"grad_norm": 0.050011906772851944, |
|
"learning_rate": 2.988512658684536e-05, |
|
"loss": 0.0937, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.02657015238341448, |
|
"grad_norm": 25.49470329284668, |
|
"learning_rate": 2.9877910920189917e-05, |
|
"loss": 0.1614, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.02728826460999325, |
|
"grad_norm": 0.3818345367908478, |
|
"learning_rate": 2.9870695253534476e-05, |
|
"loss": 0.1194, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.02800637683657202, |
|
"grad_norm": 9.010673522949219, |
|
"learning_rate": 2.986347958687903e-05, |
|
"loss": 0.0894, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.02872448906315079, |
|
"grad_norm": 18.90770721435547, |
|
"learning_rate": 2.985626392022359e-05, |
|
"loss": 0.1395, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02944260128972956, |
|
"grad_norm": 0.030203022062778473, |
|
"learning_rate": 2.984904825356815e-05, |
|
"loss": 0.115, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.030160713516308328, |
|
"grad_norm": 0.0564361996948719, |
|
"learning_rate": 2.9841832586912704e-05, |
|
"loss": 0.0639, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.030878825742887098, |
|
"grad_norm": 0.019318081438541412, |
|
"learning_rate": 2.9834616920257266e-05, |
|
"loss": 0.0859, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.03159693796946587, |
|
"grad_norm": 0.49546706676483154, |
|
"learning_rate": 2.982754556693493e-05, |
|
"loss": 0.1413, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.03231505019604464, |
|
"grad_norm": 0.03695172816514969, |
|
"learning_rate": 2.9820329900279487e-05, |
|
"loss": 0.0901, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.033033162422623406, |
|
"grad_norm": 0.016125192865729332, |
|
"learning_rate": 2.9813114233624046e-05, |
|
"loss": 0.0813, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.033751274649202176, |
|
"grad_norm": 57.18072509765625, |
|
"learning_rate": 2.9805898566968605e-05, |
|
"loss": 0.095, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.034469386875780945, |
|
"grad_norm": 4.513989448547363, |
|
"learning_rate": 2.979868290031316e-05, |
|
"loss": 0.1273, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.035187499102359715, |
|
"grad_norm": 10.933919906616211, |
|
"learning_rate": 2.979146723365772e-05, |
|
"loss": 0.0767, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.035905611328938485, |
|
"grad_norm": 83.93428802490234, |
|
"learning_rate": 2.9784251567002277e-05, |
|
"loss": 0.1061, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.036623723555517254, |
|
"grad_norm": 16.953140258789062, |
|
"learning_rate": 2.9777035900346833e-05, |
|
"loss": 0.1024, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.037341835782096024, |
|
"grad_norm": 17.177425384521484, |
|
"learning_rate": 2.976982023369139e-05, |
|
"loss": 0.1049, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.03805994800867479, |
|
"grad_norm": 0.028152592480182648, |
|
"learning_rate": 2.976260456703595e-05, |
|
"loss": 0.0597, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.03877806023525356, |
|
"grad_norm": 22.621421813964844, |
|
"learning_rate": 2.9755388900380506e-05, |
|
"loss": 0.1103, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.03949617246183233, |
|
"grad_norm": 0.2816521227359772, |
|
"learning_rate": 2.9748173233725064e-05, |
|
"loss": 0.0794, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.0402142846884111, |
|
"grad_norm": 0.068883016705513, |
|
"learning_rate": 2.9740957567069623e-05, |
|
"loss": 0.0844, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.04093239691498987, |
|
"grad_norm": 0.06445488333702087, |
|
"learning_rate": 2.973374190041418e-05, |
|
"loss": 0.0703, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.04165050914156864, |
|
"grad_norm": 0.15849824249744415, |
|
"learning_rate": 2.9726526233758737e-05, |
|
"loss": 0.0717, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.04236862136814742, |
|
"grad_norm": 0.08682260662317276, |
|
"learning_rate": 2.9719310567103296e-05, |
|
"loss": 0.0947, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.04308673359472619, |
|
"grad_norm": 20.855480194091797, |
|
"learning_rate": 2.971209490044785e-05, |
|
"loss": 0.095, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.04380484582130496, |
|
"grad_norm": 0.009179933927953243, |
|
"learning_rate": 2.9704879233792413e-05, |
|
"loss": 0.0681, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.044522958047883726, |
|
"grad_norm": 1.1233855485916138, |
|
"learning_rate": 2.969766356713697e-05, |
|
"loss": 0.1191, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.045241070274462496, |
|
"grad_norm": 7.719324588775635, |
|
"learning_rate": 2.9690447900481524e-05, |
|
"loss": 0.114, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.045959182501041265, |
|
"grad_norm": 0.08133476972579956, |
|
"learning_rate": 2.9683232233826086e-05, |
|
"loss": 0.0883, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.046677294727620035, |
|
"grad_norm": 0.15997205674648285, |
|
"learning_rate": 2.967601656717064e-05, |
|
"loss": 0.0624, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.047395406954198804, |
|
"grad_norm": 30.50885581970215, |
|
"learning_rate": 2.9668800900515197e-05, |
|
"loss": 0.0792, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.048113519180777574, |
|
"grad_norm": 0.03833446651697159, |
|
"learning_rate": 2.966158523385976e-05, |
|
"loss": 0.0655, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.04883163140735634, |
|
"grad_norm": 0.020543202757835388, |
|
"learning_rate": 2.9654369567204314e-05, |
|
"loss": 0.0648, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.04954974363393511, |
|
"grad_norm": 0.18718767166137695, |
|
"learning_rate": 2.9647153900548873e-05, |
|
"loss": 0.0722, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.05026785586051388, |
|
"grad_norm": 0.038752757012844086, |
|
"learning_rate": 2.963993823389343e-05, |
|
"loss": 0.088, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.05098596808709265, |
|
"grad_norm": 0.5428361296653748, |
|
"learning_rate": 2.9632722567237987e-05, |
|
"loss": 0.1104, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.05170408031367142, |
|
"grad_norm": 29.898271560668945, |
|
"learning_rate": 2.9625506900582546e-05, |
|
"loss": 0.0972, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.05242219254025019, |
|
"grad_norm": 8.866761207580566, |
|
"learning_rate": 2.9618291233927104e-05, |
|
"loss": 0.0844, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.05314030476682896, |
|
"grad_norm": 0.06850434094667435, |
|
"learning_rate": 2.961107556727166e-05, |
|
"loss": 0.0914, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.05385841699340773, |
|
"grad_norm": 33.26413345336914, |
|
"learning_rate": 2.9603859900616222e-05, |
|
"loss": 0.0927, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.0545765292199865, |
|
"grad_norm": 0.015440431423485279, |
|
"learning_rate": 2.9596644233960777e-05, |
|
"loss": 0.0575, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.05529464144656527, |
|
"grad_norm": 0.16492103040218353, |
|
"learning_rate": 2.9589428567305332e-05, |
|
"loss": 0.0696, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.05601275367314404, |
|
"grad_norm": 0.020445317029953003, |
|
"learning_rate": 2.9582212900649895e-05, |
|
"loss": 0.052, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.05673086589972281, |
|
"grad_norm": 0.12008040398359299, |
|
"learning_rate": 2.957499723399445e-05, |
|
"loss": 0.0864, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.05744897812630158, |
|
"grad_norm": 10.394414901733398, |
|
"learning_rate": 2.9567781567339005e-05, |
|
"loss": 0.0877, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.05816709035288035, |
|
"grad_norm": 33.755489349365234, |
|
"learning_rate": 2.9560565900683567e-05, |
|
"loss": 0.0833, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.05888520257945912, |
|
"grad_norm": 33.599884033203125, |
|
"learning_rate": 2.9553350234028123e-05, |
|
"loss": 0.0758, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.05960331480603789, |
|
"grad_norm": 0.05187524855136871, |
|
"learning_rate": 2.9546134567372678e-05, |
|
"loss": 0.052, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.060321427032616656, |
|
"grad_norm": 17.136632919311523, |
|
"learning_rate": 2.953891890071724e-05, |
|
"loss": 0.0543, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.061039539259195426, |
|
"grad_norm": 0.02133137546479702, |
|
"learning_rate": 2.9531703234061795e-05, |
|
"loss": 0.066, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.061757651485774195, |
|
"grad_norm": 0.014079502783715725, |
|
"learning_rate": 2.952448756740635e-05, |
|
"loss": 0.055, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.062475763712352965, |
|
"grad_norm": 0.07812543213367462, |
|
"learning_rate": 2.9517271900750913e-05, |
|
"loss": 0.0507, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.06319387593893173, |
|
"grad_norm": 13.165999412536621, |
|
"learning_rate": 2.9510056234095468e-05, |
|
"loss": 0.0739, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.06391198816551051, |
|
"grad_norm": 14.774604797363281, |
|
"learning_rate": 2.9502840567440027e-05, |
|
"loss": 0.0572, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.06463010039208927, |
|
"grad_norm": 0.04285166785120964, |
|
"learning_rate": 2.9495624900784586e-05, |
|
"loss": 0.0737, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.06534821261866805, |
|
"grad_norm": 0.0385599248111248, |
|
"learning_rate": 2.948840923412914e-05, |
|
"loss": 0.0651, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.06606632484524681, |
|
"grad_norm": 8.97938060760498, |
|
"learning_rate": 2.94811935674737e-05, |
|
"loss": 0.1029, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.06678443707182559, |
|
"grad_norm": 14.360940933227539, |
|
"learning_rate": 2.947397790081826e-05, |
|
"loss": 0.0861, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.06750254929840435, |
|
"grad_norm": 0.021648872643709183, |
|
"learning_rate": 2.9466762234162814e-05, |
|
"loss": 0.0503, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.06822066152498313, |
|
"grad_norm": 12.358551025390625, |
|
"learning_rate": 2.9459546567507373e-05, |
|
"loss": 0.0476, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.06893877375156189, |
|
"grad_norm": 0.5937502384185791, |
|
"learning_rate": 2.945233090085193e-05, |
|
"loss": 0.0833, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.06965688597814067, |
|
"grad_norm": 0.009575131349265575, |
|
"learning_rate": 2.9445115234196487e-05, |
|
"loss": 0.0488, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.07037499820471943, |
|
"grad_norm": 0.027700720354914665, |
|
"learning_rate": 2.9437899567541045e-05, |
|
"loss": 0.0726, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.0710931104312982, |
|
"grad_norm": 0.06248120963573456, |
|
"learning_rate": 2.9430683900885604e-05, |
|
"loss": 0.0346, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.07181122265787697, |
|
"grad_norm": 0.047881875187158585, |
|
"learning_rate": 2.942361254756327e-05, |
|
"loss": 0.0844, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.07252933488445575, |
|
"grad_norm": 0.21929340064525604, |
|
"learning_rate": 2.9416396880907825e-05, |
|
"loss": 0.0622, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.07324744711103451, |
|
"grad_norm": 0.07342693209648132, |
|
"learning_rate": 2.9409181214252387e-05, |
|
"loss": 0.1072, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.07396555933761328, |
|
"grad_norm": 0.028538711369037628, |
|
"learning_rate": 2.9401965547596943e-05, |
|
"loss": 0.0655, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.07468367156419205, |
|
"grad_norm": 0.03597363457083702, |
|
"learning_rate": 2.9394749880941498e-05, |
|
"loss": 0.027, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.07540178379077082, |
|
"grad_norm": 0.011987659148871899, |
|
"learning_rate": 2.938753421428606e-05, |
|
"loss": 0.1305, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.07611989601734959, |
|
"grad_norm": 1.634634017944336, |
|
"learning_rate": 2.9380318547630615e-05, |
|
"loss": 0.0666, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.07683800824392836, |
|
"grad_norm": 5.277307987213135, |
|
"learning_rate": 2.9373102880975174e-05, |
|
"loss": 0.0612, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.07755612047050713, |
|
"grad_norm": 9.484989166259766, |
|
"learning_rate": 2.9365887214319733e-05, |
|
"loss": 0.111, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.0782742326970859, |
|
"grad_norm": 0.1096024438738823, |
|
"learning_rate": 2.9358671547664288e-05, |
|
"loss": 0.0833, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.07899234492366466, |
|
"grad_norm": 13.125753402709961, |
|
"learning_rate": 2.9351455881008847e-05, |
|
"loss": 0.1178, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.07971045715024344, |
|
"grad_norm": 0.16206516325473785, |
|
"learning_rate": 2.9344240214353406e-05, |
|
"loss": 0.0921, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.0804285693768222, |
|
"grad_norm": 0.3482252061367035, |
|
"learning_rate": 2.933702454769796e-05, |
|
"loss": 0.0682, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.08114668160340098, |
|
"grad_norm": 5.2325944900512695, |
|
"learning_rate": 2.932980888104252e-05, |
|
"loss": 0.0622, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.08186479382997974, |
|
"grad_norm": 0.03085625357925892, |
|
"learning_rate": 2.932259321438708e-05, |
|
"loss": 0.0501, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.08258290605655852, |
|
"grad_norm": 10.35268783569336, |
|
"learning_rate": 2.9315377547731634e-05, |
|
"loss": 0.0657, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.08330101828313728, |
|
"grad_norm": 0.03741024062037468, |
|
"learning_rate": 2.9308161881076196e-05, |
|
"loss": 0.0579, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.08401913050971606, |
|
"grad_norm": 22.895612716674805, |
|
"learning_rate": 2.930094621442075e-05, |
|
"loss": 0.0745, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.08473724273629483, |
|
"grad_norm": 0.12802597880363464, |
|
"learning_rate": 2.9293730547765307e-05, |
|
"loss": 0.0595, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.0854553549628736, |
|
"grad_norm": 0.1966264247894287, |
|
"learning_rate": 2.928651488110987e-05, |
|
"loss": 0.0807, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.08617346718945237, |
|
"grad_norm": 0.4157123267650604, |
|
"learning_rate": 2.9279299214454424e-05, |
|
"loss": 0.118, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.08689157941603114, |
|
"grad_norm": 0.08162178844213486, |
|
"learning_rate": 2.9272083547798983e-05, |
|
"loss": 0.0726, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.08760969164260991, |
|
"grad_norm": 0.13167402148246765, |
|
"learning_rate": 2.926486788114354e-05, |
|
"loss": 0.0753, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.08832780386918868, |
|
"grad_norm": 0.12660837173461914, |
|
"learning_rate": 2.9257652214488097e-05, |
|
"loss": 0.0742, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.08904591609576745, |
|
"grad_norm": 30.069957733154297, |
|
"learning_rate": 2.9250436547832655e-05, |
|
"loss": 0.0372, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.08976402832234621, |
|
"grad_norm": 0.09002042561769485, |
|
"learning_rate": 2.9243220881177214e-05, |
|
"loss": 0.0892, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.09048214054892499, |
|
"grad_norm": 2.681645631790161, |
|
"learning_rate": 2.923600521452177e-05, |
|
"loss": 0.044, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.09120025277550375, |
|
"grad_norm": 0.08975830674171448, |
|
"learning_rate": 2.9228789547866328e-05, |
|
"loss": 0.0683, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.09191836500208253, |
|
"grad_norm": 1.0190995931625366, |
|
"learning_rate": 2.9221573881210887e-05, |
|
"loss": 0.0758, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.0926364772286613, |
|
"grad_norm": 13.023914337158203, |
|
"learning_rate": 2.9214358214555442e-05, |
|
"loss": 0.0743, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.09335458945524007, |
|
"grad_norm": 0.027975155040621758, |
|
"learning_rate": 2.92071425479e-05, |
|
"loss": 0.0469, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.09407270168181883, |
|
"grad_norm": 0.05313028395175934, |
|
"learning_rate": 2.919992688124456e-05, |
|
"loss": 0.029, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.09479081390839761, |
|
"grad_norm": 0.10678339004516602, |
|
"learning_rate": 2.9192711214589115e-05, |
|
"loss": 0.0258, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.09550892613497637, |
|
"grad_norm": 9.206793785095215, |
|
"learning_rate": 2.9185495547933674e-05, |
|
"loss": 0.1138, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.09622703836155515, |
|
"grad_norm": 0.13147731125354767, |
|
"learning_rate": 2.9178279881278233e-05, |
|
"loss": 0.0678, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.09694515058813391, |
|
"grad_norm": 0.058656755834817886, |
|
"learning_rate": 2.917106421462279e-05, |
|
"loss": 0.0628, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.09766326281471269, |
|
"grad_norm": 0.06019889935851097, |
|
"learning_rate": 2.9163848547967347e-05, |
|
"loss": 0.0713, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.09838137504129145, |
|
"grad_norm": 1.6211074590682983, |
|
"learning_rate": 2.9156777194645016e-05, |
|
"loss": 0.0518, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.09909948726787023, |
|
"grad_norm": 48.71439743041992, |
|
"learning_rate": 2.914956152798957e-05, |
|
"loss": 0.0585, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.09981759949444899, |
|
"grad_norm": 7.728841304779053, |
|
"learning_rate": 2.914234586133413e-05, |
|
"loss": 0.0953, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.10053571172102777, |
|
"grad_norm": 0.023901065811514854, |
|
"learning_rate": 2.913513019467869e-05, |
|
"loss": 0.0722, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.10125382394760653, |
|
"grad_norm": 0.10294115543365479, |
|
"learning_rate": 2.9127914528023244e-05, |
|
"loss": 0.036, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.1019719361741853, |
|
"grad_norm": 0.011413712054491043, |
|
"learning_rate": 2.9120698861367803e-05, |
|
"loss": 0.0754, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.10269004840076407, |
|
"grad_norm": 0.11528253555297852, |
|
"learning_rate": 2.911348319471236e-05, |
|
"loss": 0.0543, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.10340816062734284, |
|
"grad_norm": 0.10068734735250473, |
|
"learning_rate": 2.9106267528056917e-05, |
|
"loss": 0.0873, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.1041262728539216, |
|
"grad_norm": 0.0316169336438179, |
|
"learning_rate": 2.9099051861401475e-05, |
|
"loss": 0.0541, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.10484438508050038, |
|
"grad_norm": 9.961137771606445, |
|
"learning_rate": 2.9091836194746034e-05, |
|
"loss": 0.0497, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.10556249730707915, |
|
"grad_norm": 0.08653315156698227, |
|
"learning_rate": 2.908462052809059e-05, |
|
"loss": 0.065, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.10628060953365792, |
|
"grad_norm": 0.13973358273506165, |
|
"learning_rate": 2.9077404861435148e-05, |
|
"loss": 0.094, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.10699872176023668, |
|
"grad_norm": 2.6138124465942383, |
|
"learning_rate": 2.9070189194779707e-05, |
|
"loss": 0.0518, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.10771683398681546, |
|
"grad_norm": 0.01820351369678974, |
|
"learning_rate": 2.9062973528124262e-05, |
|
"loss": 0.0491, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.10843494621339422, |
|
"grad_norm": 0.34972327947616577, |
|
"learning_rate": 2.905575786146882e-05, |
|
"loss": 0.0902, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.109153058439973, |
|
"grad_norm": 0.09330655634403229, |
|
"learning_rate": 2.904854219481338e-05, |
|
"loss": 0.0775, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.10987117066655176, |
|
"grad_norm": 20.732421875, |
|
"learning_rate": 2.904132652815794e-05, |
|
"loss": 0.0485, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.11058928289313054, |
|
"grad_norm": 0.10912938416004181, |
|
"learning_rate": 2.9034110861502494e-05, |
|
"loss": 0.0548, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.1113073951197093, |
|
"grad_norm": 0.05953862518072128, |
|
"learning_rate": 2.9026895194847052e-05, |
|
"loss": 0.051, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.11202550734628808, |
|
"grad_norm": 0.03613553196191788, |
|
"learning_rate": 2.901967952819161e-05, |
|
"loss": 0.0576, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.11274361957286685, |
|
"grad_norm": 0.013742661103606224, |
|
"learning_rate": 2.9012463861536166e-05, |
|
"loss": 0.0435, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.11346173179944562, |
|
"grad_norm": 14.854754447937012, |
|
"learning_rate": 2.9005248194880725e-05, |
|
"loss": 0.0588, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.1141798440260244, |
|
"grad_norm": 0.025177430361509323, |
|
"learning_rate": 2.8998032528225284e-05, |
|
"loss": 0.049, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.11489795625260316, |
|
"grad_norm": 0.10366187989711761, |
|
"learning_rate": 2.8990816861569843e-05, |
|
"loss": 0.0527, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.11561606847918193, |
|
"grad_norm": 17.234394073486328, |
|
"learning_rate": 2.8983601194914398e-05, |
|
"loss": 0.0496, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.1163341807057607, |
|
"grad_norm": 0.20396387577056885, |
|
"learning_rate": 2.8976385528258957e-05, |
|
"loss": 0.0641, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.11705229293233947, |
|
"grad_norm": 0.016371482983231544, |
|
"learning_rate": 2.8969169861603515e-05, |
|
"loss": 0.0379, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.11777040515891823, |
|
"grad_norm": 0.06216276437044144, |
|
"learning_rate": 2.896195419494807e-05, |
|
"loss": 0.0565, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.11848851738549701, |
|
"grad_norm": 0.18494826555252075, |
|
"learning_rate": 2.895473852829263e-05, |
|
"loss": 0.0599, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.11920662961207577, |
|
"grad_norm": 0.012996107339859009, |
|
"learning_rate": 2.8947522861637188e-05, |
|
"loss": 0.0361, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.11992474183865455, |
|
"grad_norm": 26.88838768005371, |
|
"learning_rate": 2.8940307194981747e-05, |
|
"loss": 0.0699, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.12064285406523331, |
|
"grad_norm": 0.03782937675714493, |
|
"learning_rate": 2.8933091528326302e-05, |
|
"loss": 0.0327, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.12136096629181209, |
|
"grad_norm": 0.055766601115465164, |
|
"learning_rate": 2.892587586167086e-05, |
|
"loss": 0.0747, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.12207907851839085, |
|
"grad_norm": 0.009904072619974613, |
|
"learning_rate": 2.891866019501542e-05, |
|
"loss": 0.0611, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.12279719074496963, |
|
"grad_norm": 0.06229407340288162, |
|
"learning_rate": 2.8911444528359975e-05, |
|
"loss": 0.1479, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.12351530297154839, |
|
"grad_norm": 0.033349085599184036, |
|
"learning_rate": 2.8904228861704534e-05, |
|
"loss": 0.0442, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.12423341519812717, |
|
"grad_norm": 0.04250495880842209, |
|
"learning_rate": 2.8897013195049093e-05, |
|
"loss": 0.0646, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.12495152742470593, |
|
"grad_norm": 0.07844238728284836, |
|
"learning_rate": 2.8889797528393648e-05, |
|
"loss": 0.0732, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.1256696396512847, |
|
"grad_norm": 3.7660670280456543, |
|
"learning_rate": 2.8882581861738207e-05, |
|
"loss": 0.0612, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.12638775187786347, |
|
"grad_norm": 0.031977858394384384, |
|
"learning_rate": 2.8875366195082765e-05, |
|
"loss": 0.0511, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.12710586410444225, |
|
"grad_norm": 0.3466741442680359, |
|
"learning_rate": 2.886815052842732e-05, |
|
"loss": 0.0423, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.12782397633102102, |
|
"grad_norm": 0.314433753490448, |
|
"learning_rate": 2.886093486177188e-05, |
|
"loss": 0.0374, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.12854208855759977, |
|
"grad_norm": 0.007800741121172905, |
|
"learning_rate": 2.8853719195116438e-05, |
|
"loss": 0.0683, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.12926020078417855, |
|
"grad_norm": 0.929775595664978, |
|
"learning_rate": 2.8846503528460993e-05, |
|
"loss": 0.0772, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.12997831301075732, |
|
"grad_norm": 0.35541367530822754, |
|
"learning_rate": 2.8839287861805556e-05, |
|
"loss": 0.0577, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.1306964252373361, |
|
"grad_norm": 1.530603051185608, |
|
"learning_rate": 2.883207219515011e-05, |
|
"loss": 0.0563, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.13141453746391485, |
|
"grad_norm": 53.78153991699219, |
|
"learning_rate": 2.8824856528494666e-05, |
|
"loss": 0.0611, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.13213264969049363, |
|
"grad_norm": 2.021066427230835, |
|
"learning_rate": 2.8817640861839228e-05, |
|
"loss": 0.067, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.1328507619170724, |
|
"grad_norm": 0.058540359139442444, |
|
"learning_rate": 2.8810425195183784e-05, |
|
"loss": 0.0431, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.13356887414365118, |
|
"grad_norm": 3.8294928073883057, |
|
"learning_rate": 2.880320952852834e-05, |
|
"loss": 0.0588, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.13428698637022993, |
|
"grad_norm": 0.44677820801734924, |
|
"learning_rate": 2.87959938618729e-05, |
|
"loss": 0.0694, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.1350050985968087, |
|
"grad_norm": 0.11883492767810822, |
|
"learning_rate": 2.8788778195217456e-05, |
|
"loss": 0.0562, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.13572321082338748, |
|
"grad_norm": 26.162769317626953, |
|
"learning_rate": 2.8781562528562012e-05, |
|
"loss": 0.1393, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.13644132304996626, |
|
"grad_norm": 0.02398838847875595, |
|
"learning_rate": 2.8774346861906574e-05, |
|
"loss": 0.0499, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.137159435276545, |
|
"grad_norm": 6.311274528503418, |
|
"learning_rate": 2.876713119525113e-05, |
|
"loss": 0.0582, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.13787754750312378, |
|
"grad_norm": 0.06905557215213776, |
|
"learning_rate": 2.8759915528595685e-05, |
|
"loss": 0.0475, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.13859565972970256, |
|
"grad_norm": 0.012767783366143703, |
|
"learning_rate": 2.8752699861940247e-05, |
|
"loss": 0.0268, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.13931377195628133, |
|
"grad_norm": 0.07180549949407578, |
|
"learning_rate": 2.8745484195284802e-05, |
|
"loss": 0.0566, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.14003188418286008, |
|
"grad_norm": 0.13380548357963562, |
|
"learning_rate": 2.873826852862936e-05, |
|
"loss": 0.0714, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.14074999640943886, |
|
"grad_norm": 0.1076226532459259, |
|
"learning_rate": 2.873105286197392e-05, |
|
"loss": 0.0925, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.14146810863601764, |
|
"grad_norm": 0.09091708809137344, |
|
"learning_rate": 2.8723837195318475e-05, |
|
"loss": 0.0468, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.1421862208625964, |
|
"grad_norm": 0.049373432993888855, |
|
"learning_rate": 2.8716621528663033e-05, |
|
"loss": 0.0672, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.1429043330891752, |
|
"grad_norm": 0.0634181797504425, |
|
"learning_rate": 2.8709405862007592e-05, |
|
"loss": 0.0724, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.14362244531575394, |
|
"grad_norm": 0.3946473002433777, |
|
"learning_rate": 2.8702190195352148e-05, |
|
"loss": 0.0675, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.14434055754233271, |
|
"grad_norm": 0.1286439746618271, |
|
"learning_rate": 2.869497452869671e-05, |
|
"loss": 0.0755, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.1450586697689115, |
|
"grad_norm": 0.04242817312479019, |
|
"learning_rate": 2.8687758862041265e-05, |
|
"loss": 0.0368, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.14577678199549027, |
|
"grad_norm": 0.1294609159231186, |
|
"learning_rate": 2.868054319538582e-05, |
|
"loss": 0.0679, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.14649489422206902, |
|
"grad_norm": 0.0815710574388504, |
|
"learning_rate": 2.8673327528730382e-05, |
|
"loss": 0.0737, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.1472130064486478, |
|
"grad_norm": 0.04006512463092804, |
|
"learning_rate": 2.8666111862074938e-05, |
|
"loss": 0.05, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.14793111867522657, |
|
"grad_norm": 0.015738453716039658, |
|
"learning_rate": 2.8658896195419493e-05, |
|
"loss": 0.0629, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.14864923090180535, |
|
"grad_norm": 0.045333586633205414, |
|
"learning_rate": 2.8651680528764055e-05, |
|
"loss": 0.0534, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.1493673431283841, |
|
"grad_norm": 0.06222152337431908, |
|
"learning_rate": 2.864446486210861e-05, |
|
"loss": 0.0703, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.15008545535496287, |
|
"grad_norm": 0.0591827891767025, |
|
"learning_rate": 2.863724919545317e-05, |
|
"loss": 0.0499, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.15080356758154165, |
|
"grad_norm": 1.1121820211410522, |
|
"learning_rate": 2.8630033528797728e-05, |
|
"loss": 0.3427, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.15152167980812042, |
|
"grad_norm": 0.1692652404308319, |
|
"learning_rate": 2.8622962175475394e-05, |
|
"loss": 0.1411, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.15223979203469917, |
|
"grad_norm": 0.5019534826278687, |
|
"learning_rate": 2.861574650881995e-05, |
|
"loss": 0.085, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.15295790426127795, |
|
"grad_norm": 1.0281404256820679, |
|
"learning_rate": 2.860853084216451e-05, |
|
"loss": 0.1386, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.15367601648785673, |
|
"grad_norm": 0.048874229192733765, |
|
"learning_rate": 2.8601315175509067e-05, |
|
"loss": 0.0843, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.1543941287144355, |
|
"grad_norm": 2.0028481483459473, |
|
"learning_rate": 2.8594099508853622e-05, |
|
"loss": 0.0657, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.15511224094101425, |
|
"grad_norm": 0.1379271149635315, |
|
"learning_rate": 2.8586883842198184e-05, |
|
"loss": 0.0684, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.15583035316759303, |
|
"grad_norm": 0.08114618808031082, |
|
"learning_rate": 2.857966817554274e-05, |
|
"loss": 0.0667, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.1565484653941718, |
|
"grad_norm": 1.392622709274292, |
|
"learning_rate": 2.8572452508887295e-05, |
|
"loss": 0.0688, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.15726657762075058, |
|
"grad_norm": 11.604351997375488, |
|
"learning_rate": 2.8565236842231857e-05, |
|
"loss": 0.0882, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.15798468984732933, |
|
"grad_norm": 0.31855666637420654, |
|
"learning_rate": 2.8558021175576412e-05, |
|
"loss": 0.1015, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.1587028020739081, |
|
"grad_norm": 1.6662945747375488, |
|
"learning_rate": 2.8550805508920967e-05, |
|
"loss": 0.1386, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.15942091430048688, |
|
"grad_norm": 13.3506498336792, |
|
"learning_rate": 2.854358984226553e-05, |
|
"loss": 0.0856, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.16013902652706566, |
|
"grad_norm": 1.4275833368301392, |
|
"learning_rate": 2.8536374175610085e-05, |
|
"loss": 0.0537, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.1608571387536444, |
|
"grad_norm": 0.6070023775100708, |
|
"learning_rate": 2.852915850895464e-05, |
|
"loss": 0.0396, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.16157525098022318, |
|
"grad_norm": 0.4954681694507599, |
|
"learning_rate": 2.8521942842299202e-05, |
|
"loss": 0.0684, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.16229336320680196, |
|
"grad_norm": 0.03993278741836548, |
|
"learning_rate": 2.8514727175643758e-05, |
|
"loss": 0.0556, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.16301147543338074, |
|
"grad_norm": 0.039236683398485184, |
|
"learning_rate": 2.8507511508988316e-05, |
|
"loss": 0.066, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.16372958765995949, |
|
"grad_norm": 0.8015234470367432, |
|
"learning_rate": 2.8500295842332875e-05, |
|
"loss": 0.0636, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.16444769988653826, |
|
"grad_norm": 1.8648189306259155, |
|
"learning_rate": 2.849308017567743e-05, |
|
"loss": 0.0991, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.16516581211311704, |
|
"grad_norm": 0.09908437728881836, |
|
"learning_rate": 2.848586450902199e-05, |
|
"loss": 0.0537, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.16588392433969582, |
|
"grad_norm": 2.209059000015259, |
|
"learning_rate": 2.8478648842366548e-05, |
|
"loss": 0.0793, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.16660203656627456, |
|
"grad_norm": 0.09478212147951126, |
|
"learning_rate": 2.8471433175711103e-05, |
|
"loss": 0.0658, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.16732014879285334, |
|
"grad_norm": 2.0855391025543213, |
|
"learning_rate": 2.8464217509055662e-05, |
|
"loss": 0.0647, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.16803826101943212, |
|
"grad_norm": 0.08406183868646622, |
|
"learning_rate": 2.845700184240022e-05, |
|
"loss": 0.0601, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.1687563732460109, |
|
"grad_norm": 0.054508406668901443, |
|
"learning_rate": 2.8449786175744776e-05, |
|
"loss": 0.0527, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.16947448547258967, |
|
"grad_norm": 0.22775687277317047, |
|
"learning_rate": 2.8442570509089335e-05, |
|
"loss": 0.0712, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.17019259769916842, |
|
"grad_norm": 0.04549324885010719, |
|
"learning_rate": 2.8435354842433893e-05, |
|
"loss": 0.058, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.1709107099257472, |
|
"grad_norm": 0.05884317681193352, |
|
"learning_rate": 2.842813917577845e-05, |
|
"loss": 0.0817, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.17162882215232597, |
|
"grad_norm": 0.07801628112792969, |
|
"learning_rate": 2.8420923509123008e-05, |
|
"loss": 0.0365, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.17234693437890475, |
|
"grad_norm": 1.7730413675308228, |
|
"learning_rate": 2.8413707842467566e-05, |
|
"loss": 0.0913, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.1730650466054835, |
|
"grad_norm": 0.054094549268484116, |
|
"learning_rate": 2.8406492175812125e-05, |
|
"loss": 0.0477, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.17378315883206227, |
|
"grad_norm": 0.9634159207344055, |
|
"learning_rate": 2.839927650915668e-05, |
|
"loss": 0.0455, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.17450127105864105, |
|
"grad_norm": 0.09357574582099915, |
|
"learning_rate": 2.839206084250124e-05, |
|
"loss": 0.0636, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.17521938328521983, |
|
"grad_norm": 0.09306718409061432, |
|
"learning_rate": 2.8384845175845798e-05, |
|
"loss": 0.0541, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.17593749551179858, |
|
"grad_norm": 0.09160356223583221, |
|
"learning_rate": 2.8377629509190356e-05, |
|
"loss": 0.0678, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.17665560773837735, |
|
"grad_norm": 0.04214440658688545, |
|
"learning_rate": 2.8370413842534912e-05, |
|
"loss": 0.0371, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.17737371996495613, |
|
"grad_norm": 11.02153491973877, |
|
"learning_rate": 2.836319817587947e-05, |
|
"loss": 0.0747, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.1780918321915349, |
|
"grad_norm": 0.14870058000087738, |
|
"learning_rate": 2.835598250922403e-05, |
|
"loss": 0.1061, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.17880994441811365, |
|
"grad_norm": 0.07340437173843384, |
|
"learning_rate": 2.8348766842568585e-05, |
|
"loss": 0.0835, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.17952805664469243, |
|
"grad_norm": 0.13345997035503387, |
|
"learning_rate": 2.8341551175913143e-05, |
|
"loss": 0.0566, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.1802461688712712, |
|
"grad_norm": 0.1167718842625618, |
|
"learning_rate": 2.8334335509257702e-05, |
|
"loss": 0.0517, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.18096428109784998, |
|
"grad_norm": 1.0248634815216064, |
|
"learning_rate": 2.832711984260226e-05, |
|
"loss": 0.0857, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.18168239332442873, |
|
"grad_norm": 0.7217876315116882, |
|
"learning_rate": 2.8319904175946816e-05, |
|
"loss": 0.0621, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.1824005055510075, |
|
"grad_norm": 0.13123983144760132, |
|
"learning_rate": 2.8312688509291375e-05, |
|
"loss": 0.047, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.18311861777758628, |
|
"grad_norm": 0.12581509351730347, |
|
"learning_rate": 2.8305472842635934e-05, |
|
"loss": 0.0487, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.18383673000416506, |
|
"grad_norm": 0.11207035183906555, |
|
"learning_rate": 2.829825717598049e-05, |
|
"loss": 0.0846, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.1845548422307438, |
|
"grad_norm": 0.146351158618927, |
|
"learning_rate": 2.8291041509325048e-05, |
|
"loss": 0.0541, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.1852729544573226, |
|
"grad_norm": 0.10366562008857727, |
|
"learning_rate": 2.8283970156002713e-05, |
|
"loss": 0.0882, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.18599106668390136, |
|
"grad_norm": 0.032089706510305405, |
|
"learning_rate": 2.8276754489347272e-05, |
|
"loss": 0.0591, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.18670917891048014, |
|
"grad_norm": 0.4299641251564026, |
|
"learning_rate": 2.826953882269183e-05, |
|
"loss": 0.0877, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.1874272911370589, |
|
"grad_norm": 0.31984642148017883, |
|
"learning_rate": 2.8262323156036386e-05, |
|
"loss": 0.1061, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.18814540336363766, |
|
"grad_norm": 0.21333181858062744, |
|
"learning_rate": 2.8255107489380945e-05, |
|
"loss": 0.0754, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.18886351559021644, |
|
"grad_norm": 44.345706939697266, |
|
"learning_rate": 2.8247891822725504e-05, |
|
"loss": 0.0724, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.18958162781679522, |
|
"grad_norm": 18.202777862548828, |
|
"learning_rate": 2.824067615607006e-05, |
|
"loss": 0.0707, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.19029974004337397, |
|
"grad_norm": 0.09870504587888718, |
|
"learning_rate": 2.8233460489414618e-05, |
|
"loss": 0.0385, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.19101785226995274, |
|
"grad_norm": 0.06396225094795227, |
|
"learning_rate": 2.8226244822759176e-05, |
|
"loss": 0.0695, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.19173596449653152, |
|
"grad_norm": 0.07328931987285614, |
|
"learning_rate": 2.8219029156103732e-05, |
|
"loss": 0.052, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.1924540767231103, |
|
"grad_norm": 0.0672977939248085, |
|
"learning_rate": 2.821181348944829e-05, |
|
"loss": 0.0757, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.19317218894968904, |
|
"grad_norm": 0.09179489314556122, |
|
"learning_rate": 2.820459782279285e-05, |
|
"loss": 0.0716, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.19389030117626782, |
|
"grad_norm": 0.11543264240026474, |
|
"learning_rate": 2.8197382156137408e-05, |
|
"loss": 0.1018, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.1946084134028466, |
|
"grad_norm": 1.2652478218078613, |
|
"learning_rate": 2.8190166489481963e-05, |
|
"loss": 0.0646, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.19532652562942537, |
|
"grad_norm": 0.10677991062402725, |
|
"learning_rate": 2.8182950822826522e-05, |
|
"loss": 0.0634, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.19604463785600412, |
|
"grad_norm": 0.09842602908611298, |
|
"learning_rate": 2.817573515617108e-05, |
|
"loss": 0.0389, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.1967627500825829, |
|
"grad_norm": 0.07868483662605286, |
|
"learning_rate": 2.8168519489515636e-05, |
|
"loss": 0.056, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.19748086230916168, |
|
"grad_norm": 0.10997475683689117, |
|
"learning_rate": 2.8161303822860195e-05, |
|
"loss": 0.0769, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.19819897453574045, |
|
"grad_norm": 5.006385326385498, |
|
"learning_rate": 2.8154088156204753e-05, |
|
"loss": 0.0778, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.19891708676231923, |
|
"grad_norm": 5.939600944519043, |
|
"learning_rate": 2.814687248954931e-05, |
|
"loss": 0.0406, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.19963519898889798, |
|
"grad_norm": 1.339119553565979, |
|
"learning_rate": 2.8139656822893868e-05, |
|
"loss": 0.0715, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.20035331121547675, |
|
"grad_norm": 1.5083401203155518, |
|
"learning_rate": 2.8132441156238426e-05, |
|
"loss": 0.0735, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.20107142344205553, |
|
"grad_norm": 1.6835660934448242, |
|
"learning_rate": 2.812522548958298e-05, |
|
"loss": 0.0568, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.2017895356686343, |
|
"grad_norm": 0.1471836119890213, |
|
"learning_rate": 2.811800982292754e-05, |
|
"loss": 0.0496, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.20250764789521306, |
|
"grad_norm": 0.06911035627126694, |
|
"learning_rate": 2.81107941562721e-05, |
|
"loss": 0.0588, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.20322576012179183, |
|
"grad_norm": 0.05670926347374916, |
|
"learning_rate": 2.8103578489616654e-05, |
|
"loss": 0.0435, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.2039438723483706, |
|
"grad_norm": 20.66596031188965, |
|
"learning_rate": 2.8096362822961216e-05, |
|
"loss": 0.056, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.20466198457494938, |
|
"grad_norm": 2.9100301265716553, |
|
"learning_rate": 2.8089147156305772e-05, |
|
"loss": 0.0737, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.20538009680152813, |
|
"grad_norm": 0.05886685848236084, |
|
"learning_rate": 2.8081931489650327e-05, |
|
"loss": 0.06, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.2060982090281069, |
|
"grad_norm": 2.182786464691162, |
|
"learning_rate": 2.807471582299489e-05, |
|
"loss": 0.0594, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.2068163212546857, |
|
"grad_norm": 0.10666311532258987, |
|
"learning_rate": 2.8067500156339445e-05, |
|
"loss": 0.0562, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.20753443348126446, |
|
"grad_norm": 1.5818663835525513, |
|
"learning_rate": 2.8060284489684003e-05, |
|
"loss": 0.0383, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.2082525457078432, |
|
"grad_norm": 0.05999474972486496, |
|
"learning_rate": 2.8053068823028562e-05, |
|
"loss": 0.05, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.208970657934422, |
|
"grad_norm": 0.09610898047685623, |
|
"learning_rate": 2.8045853156373117e-05, |
|
"loss": 0.053, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.20968877016100076, |
|
"grad_norm": 0.11906774342060089, |
|
"learning_rate": 2.8038637489717676e-05, |
|
"loss": 0.0535, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.21040688238757954, |
|
"grad_norm": 0.033821843564510345, |
|
"learning_rate": 2.8031421823062235e-05, |
|
"loss": 0.027, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.2111249946141583, |
|
"grad_norm": 0.06107163429260254, |
|
"learning_rate": 2.802420615640679e-05, |
|
"loss": 0.0901, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.21184310684073707, |
|
"grad_norm": 0.04277655854821205, |
|
"learning_rate": 2.801699048975135e-05, |
|
"loss": 0.0664, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.21256121906731584, |
|
"grad_norm": 0.07864125818014145, |
|
"learning_rate": 2.8009774823095908e-05, |
|
"loss": 0.0374, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.21327933129389462, |
|
"grad_norm": 19.828433990478516, |
|
"learning_rate": 2.8002559156440463e-05, |
|
"loss": 0.1275, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.21399744352047337, |
|
"grad_norm": 0.11536970734596252, |
|
"learning_rate": 2.7995343489785025e-05, |
|
"loss": 0.0673, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.21471555574705214, |
|
"grad_norm": 0.06297395378351212, |
|
"learning_rate": 2.798812782312958e-05, |
|
"loss": 0.0551, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.21543366797363092, |
|
"grad_norm": 0.1182003766298294, |
|
"learning_rate": 2.7980912156474136e-05, |
|
"loss": 0.0692, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.2161517802002097, |
|
"grad_norm": 0.43064287304878235, |
|
"learning_rate": 2.7973696489818698e-05, |
|
"loss": 0.08, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 0.21686989242678845, |
|
"grad_norm": 0.07606443017721176, |
|
"learning_rate": 2.7966480823163253e-05, |
|
"loss": 0.0388, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.21758800465336722, |
|
"grad_norm": 12.470568656921387, |
|
"learning_rate": 2.795926515650781e-05, |
|
"loss": 0.0426, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.218306116879946, |
|
"grad_norm": 1.1158839464187622, |
|
"learning_rate": 2.795204948985237e-05, |
|
"loss": 0.0722, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.21902422910652478, |
|
"grad_norm": 0.1124221459031105, |
|
"learning_rate": 2.7944833823196926e-05, |
|
"loss": 0.0454, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.21974234133310352, |
|
"grad_norm": 0.050924718379974365, |
|
"learning_rate": 2.793761815654148e-05, |
|
"loss": 0.0437, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.2204604535596823, |
|
"grad_norm": 0.09370733797550201, |
|
"learning_rate": 2.7930402489886043e-05, |
|
"loss": 0.0514, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 0.22117856578626108, |
|
"grad_norm": 0.11523021012544632, |
|
"learning_rate": 2.79231868232306e-05, |
|
"loss": 0.0588, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.22189667801283985, |
|
"grad_norm": 0.0939398780465126, |
|
"learning_rate": 2.7915971156575154e-05, |
|
"loss": 0.0585, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.2226147902394186, |
|
"grad_norm": 0.049065787345170975, |
|
"learning_rate": 2.7908755489919716e-05, |
|
"loss": 0.0404, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.22333290246599738, |
|
"grad_norm": 0.03415745124220848, |
|
"learning_rate": 2.790153982326427e-05, |
|
"loss": 0.0536, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 0.22405101469257616, |
|
"grad_norm": 0.16649800539016724, |
|
"learning_rate": 2.789432415660883e-05, |
|
"loss": 0.0802, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.22476912691915493, |
|
"grad_norm": 0.0649518072605133, |
|
"learning_rate": 2.788710848995339e-05, |
|
"loss": 0.0387, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 0.2254872391457337, |
|
"grad_norm": 0.03974470496177673, |
|
"learning_rate": 2.7879892823297944e-05, |
|
"loss": 0.0407, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.22620535137231246, |
|
"grad_norm": 0.08658800274133682, |
|
"learning_rate": 2.7872677156642503e-05, |
|
"loss": 0.0486, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.22692346359889123, |
|
"grad_norm": 0.03997417911887169, |
|
"learning_rate": 2.7865461489987062e-05, |
|
"loss": 0.039, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.22764157582547, |
|
"grad_norm": 0.44339972734451294, |
|
"learning_rate": 2.7858245823331617e-05, |
|
"loss": 0.082, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 0.2283596880520488, |
|
"grad_norm": 0.06587305665016174, |
|
"learning_rate": 2.7851030156676176e-05, |
|
"loss": 0.0627, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.22907780027862754, |
|
"grad_norm": 45.02585983276367, |
|
"learning_rate": 2.7843814490020734e-05, |
|
"loss": 0.0865, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 0.2297959125052063, |
|
"grad_norm": 0.046719033271074295, |
|
"learning_rate": 2.783659882336529e-05, |
|
"loss": 0.0566, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.2305140247317851, |
|
"grad_norm": 0.06561392545700073, |
|
"learning_rate": 2.7829527470042956e-05, |
|
"loss": 0.0639, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.23123213695836387, |
|
"grad_norm": 0.04073488339781761, |
|
"learning_rate": 2.7822311803387518e-05, |
|
"loss": 0.0421, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.23195024918494261, |
|
"grad_norm": 0.03125346079468727, |
|
"learning_rate": 2.7815096136732073e-05, |
|
"loss": 0.0702, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 0.2326683614115214, |
|
"grad_norm": 0.1020875945687294, |
|
"learning_rate": 2.780788047007663e-05, |
|
"loss": 0.0601, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.23338647363810017, |
|
"grad_norm": 0.05583386868238449, |
|
"learning_rate": 2.780066480342119e-05, |
|
"loss": 0.0384, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 0.23410458586467894, |
|
"grad_norm": 0.5046035647392273, |
|
"learning_rate": 2.7793449136765746e-05, |
|
"loss": 0.0562, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.2348226980912577, |
|
"grad_norm": 0.03745768964290619, |
|
"learning_rate": 2.77862334701103e-05, |
|
"loss": 0.0518, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 0.23554081031783647, |
|
"grad_norm": 4.189239501953125, |
|
"learning_rate": 2.7779017803454863e-05, |
|
"loss": 0.0638, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.23625892254441525, |
|
"grad_norm": 5.748262405395508, |
|
"learning_rate": 2.777180213679942e-05, |
|
"loss": 0.08, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 0.23697703477099402, |
|
"grad_norm": 0.13202162086963654, |
|
"learning_rate": 2.7764586470143977e-05, |
|
"loss": 0.0986, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.23769514699757277, |
|
"grad_norm": 0.22944556176662445, |
|
"learning_rate": 2.7757370803488536e-05, |
|
"loss": 0.0593, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 0.23841325922415155, |
|
"grad_norm": 0.041575804352760315, |
|
"learning_rate": 2.775015513683309e-05, |
|
"loss": 0.0316, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.23913137145073032, |
|
"grad_norm": 0.5464422702789307, |
|
"learning_rate": 2.774293947017765e-05, |
|
"loss": 0.049, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 0.2398494836773091, |
|
"grad_norm": 0.057239070534706116, |
|
"learning_rate": 2.773572380352221e-05, |
|
"loss": 0.0785, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.24056759590388785, |
|
"grad_norm": 0.04813413694500923, |
|
"learning_rate": 2.7728508136866764e-05, |
|
"loss": 0.0426, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 0.24128570813046663, |
|
"grad_norm": 0.21200266480445862, |
|
"learning_rate": 2.7721292470211326e-05, |
|
"loss": 0.0563, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.2420038203570454, |
|
"grad_norm": 0.054479002952575684, |
|
"learning_rate": 2.771407680355588e-05, |
|
"loss": 0.0372, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 0.24272193258362418, |
|
"grad_norm": 0.07375594973564148, |
|
"learning_rate": 2.7706861136900437e-05, |
|
"loss": 0.0832, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.24344004481020293, |
|
"grad_norm": 0.0346427857875824, |
|
"learning_rate": 2.7699645470245e-05, |
|
"loss": 0.0324, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 0.2441581570367817, |
|
"grad_norm": 0.027668170630931854, |
|
"learning_rate": 2.7692429803589554e-05, |
|
"loss": 0.0286, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.24487626926336048, |
|
"grad_norm": 0.16506126523017883, |
|
"learning_rate": 2.768521413693411e-05, |
|
"loss": 0.0454, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.24559438148993926, |
|
"grad_norm": 0.14680777490139008, |
|
"learning_rate": 2.7677998470278672e-05, |
|
"loss": 0.0487, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.246312493716518, |
|
"grad_norm": 1.6522406339645386, |
|
"learning_rate": 2.7670782803623227e-05, |
|
"loss": 0.0592, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 0.24703060594309678, |
|
"grad_norm": 0.020861970260739326, |
|
"learning_rate": 2.7663567136967786e-05, |
|
"loss": 0.0501, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.24774871816967556, |
|
"grad_norm": 0.17236000299453735, |
|
"learning_rate": 2.7656351470312345e-05, |
|
"loss": 0.0811, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.24846683039625433, |
|
"grad_norm": 0.1144992858171463, |
|
"learning_rate": 2.76491358036569e-05, |
|
"loss": 0.0692, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.24918494262283308, |
|
"grad_norm": 0.1321888267993927, |
|
"learning_rate": 2.764192013700146e-05, |
|
"loss": 0.0439, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 0.24990305484941186, |
|
"grad_norm": 0.07972278445959091, |
|
"learning_rate": 2.7634704470346017e-05, |
|
"loss": 0.0479, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.25062116707599064, |
|
"grad_norm": 0.029969004914164543, |
|
"learning_rate": 2.7627488803690573e-05, |
|
"loss": 0.0422, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 0.2513392793025694, |
|
"grad_norm": 0.08719193935394287, |
|
"learning_rate": 2.762027313703513e-05, |
|
"loss": 0.0688, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.2520573915291482, |
|
"grad_norm": 1.5696977376937866, |
|
"learning_rate": 2.761305747037969e-05, |
|
"loss": 0.0543, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 0.25277550375572694, |
|
"grad_norm": 0.03598424419760704, |
|
"learning_rate": 2.7605841803724246e-05, |
|
"loss": 0.0468, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.25349361598230574, |
|
"grad_norm": 0.0753965675830841, |
|
"learning_rate": 2.7598626137068804e-05, |
|
"loss": 0.0401, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 0.2542117282088845, |
|
"grad_norm": 0.06296613067388535, |
|
"learning_rate": 2.7591410470413363e-05, |
|
"loss": 0.0523, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.25492984043546324, |
|
"grad_norm": 0.10784654319286346, |
|
"learning_rate": 2.7584194803757918e-05, |
|
"loss": 0.0495, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 0.25564795266204204, |
|
"grad_norm": 14.075483322143555, |
|
"learning_rate": 2.7576979137102477e-05, |
|
"loss": 0.0783, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.2563660648886208, |
|
"grad_norm": 0.05900178104639053, |
|
"learning_rate": 2.7569763470447036e-05, |
|
"loss": 0.0306, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 0.25708417711519954, |
|
"grad_norm": 0.07077598571777344, |
|
"learning_rate": 2.7562547803791594e-05, |
|
"loss": 0.0474, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.25780228934177835, |
|
"grad_norm": 0.035241879522800446, |
|
"learning_rate": 2.755533213713615e-05, |
|
"loss": 0.0348, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 0.2585204015683571, |
|
"grad_norm": 0.031246617436408997, |
|
"learning_rate": 2.754811647048071e-05, |
|
"loss": 0.0335, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.2592385137949359, |
|
"grad_norm": 0.01713900826871395, |
|
"learning_rate": 2.7540900803825267e-05, |
|
"loss": 0.0395, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 0.25995662602151465, |
|
"grad_norm": 27.746681213378906, |
|
"learning_rate": 2.7533685137169823e-05, |
|
"loss": 0.0682, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.2606747382480934, |
|
"grad_norm": 0.17817170917987823, |
|
"learning_rate": 2.752646947051438e-05, |
|
"loss": 0.0677, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 0.2613928504746722, |
|
"grad_norm": 0.026913467794656754, |
|
"learning_rate": 2.751925380385894e-05, |
|
"loss": 0.0458, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.26211096270125095, |
|
"grad_norm": 1.5317715406417847, |
|
"learning_rate": 2.7512038137203495e-05, |
|
"loss": 0.062, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 0.2628290749278297, |
|
"grad_norm": 0.04229836165904999, |
|
"learning_rate": 2.7504822470548054e-05, |
|
"loss": 0.0408, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.2635471871544085, |
|
"grad_norm": 0.021231146529316902, |
|
"learning_rate": 2.7497606803892613e-05, |
|
"loss": 0.0425, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 0.26426529938098725, |
|
"grad_norm": 1.7055615186691284, |
|
"learning_rate": 2.7490391137237168e-05, |
|
"loss": 0.055, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.26498341160756606, |
|
"grad_norm": 0.1902083307504654, |
|
"learning_rate": 2.7483175470581727e-05, |
|
"loss": 0.0416, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 0.2657015238341448, |
|
"grad_norm": 0.016266852617263794, |
|
"learning_rate": 2.7475959803926286e-05, |
|
"loss": 0.0543, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.26641963606072355, |
|
"grad_norm": 19.51447868347168, |
|
"learning_rate": 2.746874413727084e-05, |
|
"loss": 0.0517, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 0.26713774828730236, |
|
"grad_norm": 0.012048796750605106, |
|
"learning_rate": 2.7461528470615403e-05, |
|
"loss": 0.021, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.2678558605138811, |
|
"grad_norm": 1.3456578254699707, |
|
"learning_rate": 2.745431280395996e-05, |
|
"loss": 0.0766, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 0.26857397274045985, |
|
"grad_norm": 5.304058074951172, |
|
"learning_rate": 2.7447097137304517e-05, |
|
"loss": 0.1094, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.26929208496703866, |
|
"grad_norm": 0.19321753084659576, |
|
"learning_rate": 2.7439881470649076e-05, |
|
"loss": 0.0706, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.2700101971936174, |
|
"grad_norm": 0.23943820595741272, |
|
"learning_rate": 2.743266580399363e-05, |
|
"loss": 0.1026, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.2707283094201962, |
|
"grad_norm": 0.08400757610797882, |
|
"learning_rate": 2.742545013733819e-05, |
|
"loss": 0.0598, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 0.27144642164677496, |
|
"grad_norm": 0.2430121749639511, |
|
"learning_rate": 2.741823447068275e-05, |
|
"loss": 0.0693, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.2721645338733537, |
|
"grad_norm": 0.06597993522882462, |
|
"learning_rate": 2.7411163117360414e-05, |
|
"loss": 0.0384, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 0.2728826460999325, |
|
"grad_norm": 7.39203405380249, |
|
"learning_rate": 2.7403947450704973e-05, |
|
"loss": 0.0541, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.27360075832651126, |
|
"grad_norm": 0.1847894936800003, |
|
"learning_rate": 2.739673178404953e-05, |
|
"loss": 0.0606, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 0.27431887055309, |
|
"grad_norm": 0.03135521709918976, |
|
"learning_rate": 2.7389516117394087e-05, |
|
"loss": 0.042, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.2750369827796688, |
|
"grad_norm": 1.6055335998535156, |
|
"learning_rate": 2.7382300450738646e-05, |
|
"loss": 0.0428, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 0.27575509500624756, |
|
"grad_norm": 0.15340352058410645, |
|
"learning_rate": 2.73750847840832e-05, |
|
"loss": 0.0712, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.27647320723282637, |
|
"grad_norm": 0.027920212596654892, |
|
"learning_rate": 2.736786911742776e-05, |
|
"loss": 0.0361, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 0.2771913194594051, |
|
"grad_norm": 0.21404047310352325, |
|
"learning_rate": 2.736065345077232e-05, |
|
"loss": 0.0574, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.27790943168598387, |
|
"grad_norm": 0.02065029740333557, |
|
"learning_rate": 2.7353437784116874e-05, |
|
"loss": 0.0331, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 0.27862754391256267, |
|
"grad_norm": 6.572165012359619, |
|
"learning_rate": 2.7346222117461433e-05, |
|
"loss": 0.049, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.2793456561391414, |
|
"grad_norm": 0.03197706490755081, |
|
"learning_rate": 2.733900645080599e-05, |
|
"loss": 0.0336, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 0.28006376836572017, |
|
"grad_norm": 1.9417047500610352, |
|
"learning_rate": 2.733179078415055e-05, |
|
"loss": 0.0781, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.28078188059229897, |
|
"grad_norm": 0.07425859570503235, |
|
"learning_rate": 2.7324575117495106e-05, |
|
"loss": 0.0499, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 0.2814999928188777, |
|
"grad_norm": 0.09280502051115036, |
|
"learning_rate": 2.7317359450839664e-05, |
|
"loss": 0.0583, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.2822181050454565, |
|
"grad_norm": 0.1167830228805542, |
|
"learning_rate": 2.7310143784184223e-05, |
|
"loss": 0.1167, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 0.2829362172720353, |
|
"grad_norm": 7.878513813018799, |
|
"learning_rate": 2.7302928117528778e-05, |
|
"loss": 0.0615, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.283654329498614, |
|
"grad_norm": 0.05659603700041771, |
|
"learning_rate": 2.7295712450873337e-05, |
|
"loss": 0.0561, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 0.2843724417251928, |
|
"grad_norm": 0.061973392963409424, |
|
"learning_rate": 2.7288496784217896e-05, |
|
"loss": 0.0736, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.2850905539517716, |
|
"grad_norm": 0.05980992689728737, |
|
"learning_rate": 2.728128111756245e-05, |
|
"loss": 0.0461, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 0.2858086661783504, |
|
"grad_norm": 0.025946056470274925, |
|
"learning_rate": 2.727406545090701e-05, |
|
"loss": 0.0202, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.2865267784049291, |
|
"grad_norm": 0.024165382608771324, |
|
"learning_rate": 2.726684978425157e-05, |
|
"loss": 0.0674, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 0.2872448906315079, |
|
"grad_norm": 0.09668976813554764, |
|
"learning_rate": 2.7259634117596124e-05, |
|
"loss": 0.0576, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.2879630028580867, |
|
"grad_norm": 0.11194832623004913, |
|
"learning_rate": 2.7252418450940683e-05, |
|
"loss": 0.035, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 0.28868111508466543, |
|
"grad_norm": 0.024094650521874428, |
|
"learning_rate": 2.724520278428524e-05, |
|
"loss": 0.0435, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.2893992273112442, |
|
"grad_norm": 0.05443573743104935, |
|
"learning_rate": 2.7237987117629797e-05, |
|
"loss": 0.0706, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.290117339537823, |
|
"grad_norm": 8.078572273254395, |
|
"learning_rate": 2.723077145097436e-05, |
|
"loss": 0.0559, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.29083545176440173, |
|
"grad_norm": 0.09734756499528885, |
|
"learning_rate": 2.7223555784318914e-05, |
|
"loss": 0.0895, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 0.29155356399098054, |
|
"grad_norm": 0.12933404743671417, |
|
"learning_rate": 2.721634011766347e-05, |
|
"loss": 0.0383, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.2922716762175593, |
|
"grad_norm": 0.5858670473098755, |
|
"learning_rate": 2.720912445100803e-05, |
|
"loss": 0.0853, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 0.29298978844413803, |
|
"grad_norm": 40.67034912109375, |
|
"learning_rate": 2.7201908784352587e-05, |
|
"loss": 0.0467, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.29370790067071684, |
|
"grad_norm": 9.080503463745117, |
|
"learning_rate": 2.7194693117697142e-05, |
|
"loss": 0.0588, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 0.2944260128972956, |
|
"grad_norm": 13.065214157104492, |
|
"learning_rate": 2.7187477451041704e-05, |
|
"loss": 0.0269, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.29514412512387433, |
|
"grad_norm": 0.03507602587342262, |
|
"learning_rate": 2.718026178438626e-05, |
|
"loss": 0.0486, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 0.29586223735045314, |
|
"grad_norm": 0.053962577134370804, |
|
"learning_rate": 2.7173046117730815e-05, |
|
"loss": 0.0704, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.2965803495770319, |
|
"grad_norm": 0.04702967032790184, |
|
"learning_rate": 2.7165830451075377e-05, |
|
"loss": 0.0503, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 0.2972984618036107, |
|
"grad_norm": 0.04396852105855942, |
|
"learning_rate": 2.7158614784419932e-05, |
|
"loss": 0.0601, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.29801657403018944, |
|
"grad_norm": 0.053248144686222076, |
|
"learning_rate": 2.7151399117764488e-05, |
|
"loss": 0.048, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 0.2987346862567682, |
|
"grad_norm": 0.03361482545733452, |
|
"learning_rate": 2.714418345110905e-05, |
|
"loss": 0.0641, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.299452798483347, |
|
"grad_norm": 0.05684931203722954, |
|
"learning_rate": 2.7137112097786716e-05, |
|
"loss": 0.0337, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 0.30017091070992574, |
|
"grad_norm": 0.06214507669210434, |
|
"learning_rate": 2.712989643113127e-05, |
|
"loss": 0.0386, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.3008890229365045, |
|
"grad_norm": 0.04733401909470558, |
|
"learning_rate": 2.712268076447583e-05, |
|
"loss": 0.071, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 0.3016071351630833, |
|
"grad_norm": 3.680715560913086, |
|
"learning_rate": 2.711546509782039e-05, |
|
"loss": 0.0499, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.30232524738966204, |
|
"grad_norm": 0.4213371276855469, |
|
"learning_rate": 2.7108249431164944e-05, |
|
"loss": 0.071, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 0.30304335961624085, |
|
"grad_norm": 0.06743492186069489, |
|
"learning_rate": 2.7101033764509506e-05, |
|
"loss": 0.036, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.3037614718428196, |
|
"grad_norm": 3.1243784427642822, |
|
"learning_rate": 2.709381809785406e-05, |
|
"loss": 0.0787, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 0.30447958406939835, |
|
"grad_norm": 13.193207740783691, |
|
"learning_rate": 2.7086602431198617e-05, |
|
"loss": 0.0572, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.30519769629597715, |
|
"grad_norm": 0.3255308270454407, |
|
"learning_rate": 2.707938676454318e-05, |
|
"loss": 0.0527, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 0.3059158085225559, |
|
"grad_norm": 0.05115434527397156, |
|
"learning_rate": 2.7072171097887734e-05, |
|
"loss": 0.0287, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.30663392074913465, |
|
"grad_norm": 1.7095527648925781, |
|
"learning_rate": 2.7064955431232293e-05, |
|
"loss": 0.0632, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 0.30735203297571345, |
|
"grad_norm": 0.06429759413003922, |
|
"learning_rate": 2.705773976457685e-05, |
|
"loss": 0.0706, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.3080701452022922, |
|
"grad_norm": 58.65373229980469, |
|
"learning_rate": 2.7050524097921407e-05, |
|
"loss": 0.0485, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 0.308788257428871, |
|
"grad_norm": 0.047401025891304016, |
|
"learning_rate": 2.7043308431265965e-05, |
|
"loss": 0.0839, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.30950636965544975, |
|
"grad_norm": 0.03714323043823242, |
|
"learning_rate": 2.7036092764610524e-05, |
|
"loss": 0.0399, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 0.3102244818820285, |
|
"grad_norm": 0.06805309653282166, |
|
"learning_rate": 2.702887709795508e-05, |
|
"loss": 0.0665, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.3109425941086073, |
|
"grad_norm": 0.5862714052200317, |
|
"learning_rate": 2.7021661431299638e-05, |
|
"loss": 0.0525, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 0.31166070633518606, |
|
"grad_norm": 0.38594987988471985, |
|
"learning_rate": 2.7014445764644197e-05, |
|
"loss": 0.0772, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.31237881856176486, |
|
"grad_norm": 0.0842975378036499, |
|
"learning_rate": 2.7007230097988752e-05, |
|
"loss": 0.063, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 0.3130969307883436, |
|
"grad_norm": 0.1474410593509674, |
|
"learning_rate": 2.7000014431333314e-05, |
|
"loss": 0.0842, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.31381504301492236, |
|
"grad_norm": 0.0783839225769043, |
|
"learning_rate": 2.699279876467787e-05, |
|
"loss": 0.0551, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 0.31453315524150116, |
|
"grad_norm": 3.7803609371185303, |
|
"learning_rate": 2.6985583098022425e-05, |
|
"loss": 0.0933, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.3152512674680799, |
|
"grad_norm": 0.12234686315059662, |
|
"learning_rate": 2.6978367431366987e-05, |
|
"loss": 0.0787, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 0.31596937969465866, |
|
"grad_norm": 8.717498779296875, |
|
"learning_rate": 2.6971151764711543e-05, |
|
"loss": 0.0468, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.31668749192123746, |
|
"grad_norm": 87.91850280761719, |
|
"learning_rate": 2.6963936098056098e-05, |
|
"loss": 0.0749, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 0.3174056041478162, |
|
"grad_norm": 0.047568611800670624, |
|
"learning_rate": 2.695672043140066e-05, |
|
"loss": 0.033, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.318123716374395, |
|
"grad_norm": 0.07533901929855347, |
|
"learning_rate": 2.6949504764745215e-05, |
|
"loss": 0.044, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 0.31884182860097376, |
|
"grad_norm": 0.05188257247209549, |
|
"learning_rate": 2.694228909808977e-05, |
|
"loss": 0.0358, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.3195599408275525, |
|
"grad_norm": 21.622936248779297, |
|
"learning_rate": 2.6935073431434333e-05, |
|
"loss": 0.06, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 0.3202780530541313, |
|
"grad_norm": 0.05965098738670349, |
|
"learning_rate": 2.6927857764778888e-05, |
|
"loss": 0.0772, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.32099616528071007, |
|
"grad_norm": 0.09030428528785706, |
|
"learning_rate": 2.6920642098123443e-05, |
|
"loss": 0.044, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 0.3217142775072888, |
|
"grad_norm": 6.469705104827881, |
|
"learning_rate": 2.6913426431468006e-05, |
|
"loss": 0.0627, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.3224323897338676, |
|
"grad_norm": 0.048977576196193695, |
|
"learning_rate": 2.690621076481256e-05, |
|
"loss": 0.0402, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 0.32315050196044637, |
|
"grad_norm": 7.34023904800415, |
|
"learning_rate": 2.689899509815712e-05, |
|
"loss": 0.0661, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.32386861418702517, |
|
"grad_norm": 3.076848268508911, |
|
"learning_rate": 2.689177943150168e-05, |
|
"loss": 0.0794, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 0.3245867264136039, |
|
"grad_norm": 0.1343400776386261, |
|
"learning_rate": 2.6884563764846234e-05, |
|
"loss": 0.0601, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.32530483864018267, |
|
"grad_norm": 0.022573702037334442, |
|
"learning_rate": 2.6877348098190792e-05, |
|
"loss": 0.0309, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 0.3260229508667615, |
|
"grad_norm": 0.022028079256415367, |
|
"learning_rate": 2.687013243153535e-05, |
|
"loss": 0.0586, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.3267410630933402, |
|
"grad_norm": 0.03373101353645325, |
|
"learning_rate": 2.6862916764879906e-05, |
|
"loss": 0.048, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 0.32745917531991897, |
|
"grad_norm": 7.364265441894531, |
|
"learning_rate": 2.6855701098224465e-05, |
|
"loss": 0.0303, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.3281772875464978, |
|
"grad_norm": 7.823999881744385, |
|
"learning_rate": 2.6848485431569024e-05, |
|
"loss": 0.0647, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 0.3288953997730765, |
|
"grad_norm": 0.02031027339398861, |
|
"learning_rate": 2.684126976491358e-05, |
|
"loss": 0.0472, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.32961351199965533, |
|
"grad_norm": 27.554948806762695, |
|
"learning_rate": 2.6834054098258138e-05, |
|
"loss": 0.0451, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 0.3303316242262341, |
|
"grad_norm": 1.5407648086547852, |
|
"learning_rate": 2.6826838431602697e-05, |
|
"loss": 0.0827, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.3310497364528128, |
|
"grad_norm": 0.15170975029468536, |
|
"learning_rate": 2.6819622764947252e-05, |
|
"loss": 0.0629, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 0.33176784867939163, |
|
"grad_norm": 9.083374977111816, |
|
"learning_rate": 2.681240709829181e-05, |
|
"loss": 0.0818, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.3324859609059704, |
|
"grad_norm": 0.021673114970326424, |
|
"learning_rate": 2.680519143163637e-05, |
|
"loss": 0.0471, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 0.33320407313254913, |
|
"grad_norm": 0.01671200804412365, |
|
"learning_rate": 2.6797975764980928e-05, |
|
"loss": 0.0222, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.33392218535912793, |
|
"grad_norm": 0.01688038930296898, |
|
"learning_rate": 2.6790760098325487e-05, |
|
"loss": 0.0343, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.3346402975857067, |
|
"grad_norm": 0.025257810950279236, |
|
"learning_rate": 2.6783544431670042e-05, |
|
"loss": 0.0242, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.3353584098122855, |
|
"grad_norm": 0.016638273373246193, |
|
"learning_rate": 2.67763287650146e-05, |
|
"loss": 0.0408, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 0.33607652203886423, |
|
"grad_norm": 1.6154401302337646, |
|
"learning_rate": 2.676911309835916e-05, |
|
"loss": 0.0208, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.336794634265443, |
|
"grad_norm": 1.148065209388733, |
|
"learning_rate": 2.6761897431703715e-05, |
|
"loss": 0.0848, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 0.3375127464920218, |
|
"grad_norm": 3.4515271186828613, |
|
"learning_rate": 2.6754681765048274e-05, |
|
"loss": 0.0402, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.33823085871860054, |
|
"grad_norm": 0.04399702697992325, |
|
"learning_rate": 2.6747466098392832e-05, |
|
"loss": 0.0293, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 0.33894897094517934, |
|
"grad_norm": 0.023840222507715225, |
|
"learning_rate": 2.6740250431737388e-05, |
|
"loss": 0.0567, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.3396670831717581, |
|
"grad_norm": 0.18096154928207397, |
|
"learning_rate": 2.6733034765081947e-05, |
|
"loss": 0.0792, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 0.34038519539833684, |
|
"grad_norm": 2.950413227081299, |
|
"learning_rate": 2.6725819098426505e-05, |
|
"loss": 0.0568, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.34110330762491564, |
|
"grad_norm": 0.03382248431444168, |
|
"learning_rate": 2.671860343177106e-05, |
|
"loss": 0.0389, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 0.3418214198514944, |
|
"grad_norm": 0.06225144863128662, |
|
"learning_rate": 2.671138776511562e-05, |
|
"loss": 0.027, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.34253953207807314, |
|
"grad_norm": 0.11203402280807495, |
|
"learning_rate": 2.6704172098460178e-05, |
|
"loss": 0.0481, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 0.34325764430465194, |
|
"grad_norm": 0.028412073850631714, |
|
"learning_rate": 2.6696956431804737e-05, |
|
"loss": 0.0472, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.3439757565312307, |
|
"grad_norm": 0.043254122138023376, |
|
"learning_rate": 2.6689740765149292e-05, |
|
"loss": 0.0524, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 0.3446938687578095, |
|
"grad_norm": 0.1154152899980545, |
|
"learning_rate": 2.668252509849385e-05, |
|
"loss": 0.0625, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.34541198098438824, |
|
"grad_norm": 0.0551941879093647, |
|
"learning_rate": 2.667530943183841e-05, |
|
"loss": 0.065, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 0.346130093210967, |
|
"grad_norm": 5.94352388381958, |
|
"learning_rate": 2.6668093765182965e-05, |
|
"loss": 0.0645, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.3468482054375458, |
|
"grad_norm": 0.05754236876964569, |
|
"learning_rate": 2.6660878098527524e-05, |
|
"loss": 0.0396, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 0.34756631766412455, |
|
"grad_norm": 0.032528094947338104, |
|
"learning_rate": 2.6653662431872082e-05, |
|
"loss": 0.0379, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.3482844298907033, |
|
"grad_norm": 1.8535875082015991, |
|
"learning_rate": 2.6646446765216638e-05, |
|
"loss": 0.0479, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 0.3490025421172821, |
|
"grad_norm": 0.05355213209986687, |
|
"learning_rate": 2.6639231098561196e-05, |
|
"loss": 0.0337, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.34972065434386085, |
|
"grad_norm": 0.12058199197053909, |
|
"learning_rate": 2.6632015431905755e-05, |
|
"loss": 0.0764, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 0.35043876657043965, |
|
"grad_norm": 20.0172176361084, |
|
"learning_rate": 2.662479976525031e-05, |
|
"loss": 0.0692, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.3511568787970184, |
|
"grad_norm": 1.6164151430130005, |
|
"learning_rate": 2.6617584098594873e-05, |
|
"loss": 0.0617, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 0.35187499102359715, |
|
"grad_norm": 0.1497562974691391, |
|
"learning_rate": 2.6610368431939428e-05, |
|
"loss": 0.0615, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.35259310325017595, |
|
"grad_norm": 0.18678772449493408, |
|
"learning_rate": 2.6603152765283983e-05, |
|
"loss": 0.051, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 0.3533112154767547, |
|
"grad_norm": 0.29123035073280334, |
|
"learning_rate": 2.6595937098628545e-05, |
|
"loss": 0.0606, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.35402932770333345, |
|
"grad_norm": 3.2221381664276123, |
|
"learning_rate": 2.65887214319731e-05, |
|
"loss": 0.0341, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 0.35474743992991226, |
|
"grad_norm": 0.10685055702924728, |
|
"learning_rate": 2.6581505765317656e-05, |
|
"loss": 0.0843, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.355465552156491, |
|
"grad_norm": 0.10422638803720474, |
|
"learning_rate": 2.6574290098662218e-05, |
|
"loss": 0.0547, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 0.3561836643830698, |
|
"grad_norm": 0.08146257698535919, |
|
"learning_rate": 2.6567074432006773e-05, |
|
"loss": 0.0439, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.35690177660964856, |
|
"grad_norm": 0.15033039450645447, |
|
"learning_rate": 2.655985876535133e-05, |
|
"loss": 0.0713, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 0.3576198888362273, |
|
"grad_norm": 0.3770735561847687, |
|
"learning_rate": 2.655264309869589e-05, |
|
"loss": 0.0199, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.3583380010628061, |
|
"grad_norm": 0.5093479156494141, |
|
"learning_rate": 2.6545427432040446e-05, |
|
"loss": 0.0632, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 0.35905611328938486, |
|
"grad_norm": 0.03541666641831398, |
|
"learning_rate": 2.6538211765385e-05, |
|
"loss": 0.0494, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.3597742255159636, |
|
"grad_norm": 0.10205601900815964, |
|
"learning_rate": 2.6530996098729564e-05, |
|
"loss": 0.1011, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 0.3604923377425424, |
|
"grad_norm": 11.667498588562012, |
|
"learning_rate": 2.652378043207412e-05, |
|
"loss": 0.0367, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.36121044996912116, |
|
"grad_norm": 0.04607768356800079, |
|
"learning_rate": 2.6516564765418678e-05, |
|
"loss": 0.0456, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 0.36192856219569997, |
|
"grad_norm": 0.3675365746021271, |
|
"learning_rate": 2.6509493412096344e-05, |
|
"loss": 0.0653, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.3626466744222787, |
|
"grad_norm": 0.06698648631572723, |
|
"learning_rate": 2.6502277745440902e-05, |
|
"loss": 0.053, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 0.36336478664885746, |
|
"grad_norm": 2.359265089035034, |
|
"learning_rate": 2.6495062078785458e-05, |
|
"loss": 0.0399, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.36408289887543627, |
|
"grad_norm": 1.7382354736328125, |
|
"learning_rate": 2.648784641213002e-05, |
|
"loss": 0.0438, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 0.364801011102015, |
|
"grad_norm": 10.03585433959961, |
|
"learning_rate": 2.6480630745474575e-05, |
|
"loss": 0.0469, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.3655191233285938, |
|
"grad_norm": 0.02849421463906765, |
|
"learning_rate": 2.647341507881913e-05, |
|
"loss": 0.039, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 0.36623723555517257, |
|
"grad_norm": 0.04304594174027443, |
|
"learning_rate": 2.6466199412163692e-05, |
|
"loss": 0.0481, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.3669553477817513, |
|
"grad_norm": 0.12656258046627045, |
|
"learning_rate": 2.6458983745508248e-05, |
|
"loss": 0.0584, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 0.3676734600083301, |
|
"grad_norm": 0.01808289811015129, |
|
"learning_rate": 2.6451768078852807e-05, |
|
"loss": 0.0405, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.36839157223490887, |
|
"grad_norm": 3.8107683658599854, |
|
"learning_rate": 2.6444552412197365e-05, |
|
"loss": 0.0542, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 0.3691096844614876, |
|
"grad_norm": 0.02769179455935955, |
|
"learning_rate": 2.643733674554192e-05, |
|
"loss": 0.0452, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.3698277966880664, |
|
"grad_norm": 0.10629933327436447, |
|
"learning_rate": 2.643012107888648e-05, |
|
"loss": 0.0545, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 0.3705459089146452, |
|
"grad_norm": 0.025269586592912674, |
|
"learning_rate": 2.6422905412231038e-05, |
|
"loss": 0.0353, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.371264021141224, |
|
"grad_norm": 0.04765285179018974, |
|
"learning_rate": 2.6415689745575593e-05, |
|
"loss": 0.042, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 0.3719821333678027, |
|
"grad_norm": 0.020955292508006096, |
|
"learning_rate": 2.6408474078920152e-05, |
|
"loss": 0.0403, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.3727002455943815, |
|
"grad_norm": 0.03894009068608284, |
|
"learning_rate": 2.640125841226471e-05, |
|
"loss": 0.0184, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 0.3734183578209603, |
|
"grad_norm": 0.01537378504872322, |
|
"learning_rate": 2.6394042745609266e-05, |
|
"loss": 0.0491, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.374136470047539, |
|
"grad_norm": 0.17179059982299805, |
|
"learning_rate": 2.6386827078953828e-05, |
|
"loss": 0.032, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 0.3748545822741178, |
|
"grad_norm": 0.03283374384045601, |
|
"learning_rate": 2.6379611412298384e-05, |
|
"loss": 0.0686, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.3755726945006966, |
|
"grad_norm": 0.015107177197933197, |
|
"learning_rate": 2.637239574564294e-05, |
|
"loss": 0.0412, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 0.37629080672727533, |
|
"grad_norm": 0.9494103193283081, |
|
"learning_rate": 2.63651800789875e-05, |
|
"loss": 0.1147, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.37700891895385413, |
|
"grad_norm": 0.035837531089782715, |
|
"learning_rate": 2.6357964412332056e-05, |
|
"loss": 0.0311, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 0.3777270311804329, |
|
"grad_norm": 0.05149250105023384, |
|
"learning_rate": 2.6350893059009722e-05, |
|
"loss": 0.0504, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.37844514340701163, |
|
"grad_norm": 0.05706680193543434, |
|
"learning_rate": 2.634367739235428e-05, |
|
"loss": 0.0697, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 0.37916325563359043, |
|
"grad_norm": 0.5677555203437805, |
|
"learning_rate": 2.633646172569884e-05, |
|
"loss": 0.0699, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.3798813678601692, |
|
"grad_norm": 2.2311105728149414, |
|
"learning_rate": 2.6329246059043395e-05, |
|
"loss": 0.0491, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 0.38059948008674793, |
|
"grad_norm": 0.05452633649110794, |
|
"learning_rate": 2.6322030392387954e-05, |
|
"loss": 0.052, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.38131759231332674, |
|
"grad_norm": 0.10342645645141602, |
|
"learning_rate": 2.6314814725732512e-05, |
|
"loss": 0.0431, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 0.3820357045399055, |
|
"grad_norm": 7.60914421081543, |
|
"learning_rate": 2.6307599059077068e-05, |
|
"loss": 0.0637, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.3827538167664843, |
|
"grad_norm": 5.700191497802734, |
|
"learning_rate": 2.6300383392421626e-05, |
|
"loss": 0.0637, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 0.38347192899306304, |
|
"grad_norm": 2.0660006999969482, |
|
"learning_rate": 2.6293167725766185e-05, |
|
"loss": 0.0728, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.3841900412196418, |
|
"grad_norm": 0.04174574464559555, |
|
"learning_rate": 2.628595205911074e-05, |
|
"loss": 0.0446, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 0.3849081534462206, |
|
"grad_norm": 3.7447893619537354, |
|
"learning_rate": 2.62787363924553e-05, |
|
"loss": 0.0301, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.38562626567279934, |
|
"grad_norm": 2.0020968914031982, |
|
"learning_rate": 2.6271520725799858e-05, |
|
"loss": 0.0679, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 0.3863443778993781, |
|
"grad_norm": 0.04318321496248245, |
|
"learning_rate": 2.6264305059144413e-05, |
|
"loss": 0.0533, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.3870624901259569, |
|
"grad_norm": 0.05878012999892235, |
|
"learning_rate": 2.6257089392488975e-05, |
|
"loss": 0.0537, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 0.38778060235253564, |
|
"grad_norm": 28.61198616027832, |
|
"learning_rate": 2.624987372583353e-05, |
|
"loss": 0.0554, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.38849871457911445, |
|
"grad_norm": 0.12207271158695221, |
|
"learning_rate": 2.6242658059178086e-05, |
|
"loss": 0.0566, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 0.3892168268056932, |
|
"grad_norm": 2.0619852542877197, |
|
"learning_rate": 2.6235442392522648e-05, |
|
"loss": 0.0577, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.38993493903227194, |
|
"grad_norm": 0.03043234907090664, |
|
"learning_rate": 2.6228226725867203e-05, |
|
"loss": 0.041, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 0.39065305125885075, |
|
"grad_norm": 0.07814677804708481, |
|
"learning_rate": 2.622101105921176e-05, |
|
"loss": 0.0335, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.3913711634854295, |
|
"grad_norm": 0.3286628723144531, |
|
"learning_rate": 2.621379539255632e-05, |
|
"loss": 0.0667, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 0.39208927571200825, |
|
"grad_norm": 0.0443241186439991, |
|
"learning_rate": 2.6206579725900876e-05, |
|
"loss": 0.0337, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.39280738793858705, |
|
"grad_norm": 0.0626874640583992, |
|
"learning_rate": 2.619936405924543e-05, |
|
"loss": 0.0565, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 0.3935255001651658, |
|
"grad_norm": 4.9472808837890625, |
|
"learning_rate": 2.6192148392589994e-05, |
|
"loss": 0.0407, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.3942436123917446, |
|
"grad_norm": 0.1261199414730072, |
|
"learning_rate": 2.618493272593455e-05, |
|
"loss": 0.0544, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 0.39496172461832335, |
|
"grad_norm": 0.02840198390185833, |
|
"learning_rate": 2.6177717059279104e-05, |
|
"loss": 0.0342, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.3956798368449021, |
|
"grad_norm": 0.04255662113428116, |
|
"learning_rate": 2.6170501392623667e-05, |
|
"loss": 0.0542, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 0.3963979490714809, |
|
"grad_norm": 0.08006764948368073, |
|
"learning_rate": 2.6163285725968222e-05, |
|
"loss": 0.0821, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.39711606129805965, |
|
"grad_norm": 2.240722179412842, |
|
"learning_rate": 2.615607005931278e-05, |
|
"loss": 0.0599, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 0.39783417352463846, |
|
"grad_norm": 0.050311390310525894, |
|
"learning_rate": 2.614885439265734e-05, |
|
"loss": 0.0281, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.3985522857512172, |
|
"grad_norm": 0.03226127102971077, |
|
"learning_rate": 2.6141638726001895e-05, |
|
"loss": 0.0493, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 0.39927039797779595, |
|
"grad_norm": 0.2031366527080536, |
|
"learning_rate": 2.6134423059346457e-05, |
|
"loss": 0.0696, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.39998851020437476, |
|
"grad_norm": 0.5344455242156982, |
|
"learning_rate": 2.6127207392691012e-05, |
|
"loss": 0.0543, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 0.4007066224309535, |
|
"grad_norm": 0.02825642190873623, |
|
"learning_rate": 2.6119991726035567e-05, |
|
"loss": 0.0506, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.40142473465753226, |
|
"grad_norm": 0.05845210328698158, |
|
"learning_rate": 2.611277605938013e-05, |
|
"loss": 0.0466, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 0.40214284688411106, |
|
"grad_norm": 0.07064907997846603, |
|
"learning_rate": 2.6105560392724685e-05, |
|
"loss": 0.0246, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.4028609591106898, |
|
"grad_norm": 71.78559875488281, |
|
"learning_rate": 2.609834472606924e-05, |
|
"loss": 0.0838, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 0.4035790713372686, |
|
"grad_norm": 0.0453072227537632, |
|
"learning_rate": 2.6091129059413802e-05, |
|
"loss": 0.0378, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.40429718356384736, |
|
"grad_norm": 0.054179996252059937, |
|
"learning_rate": 2.6083913392758358e-05, |
|
"loss": 0.05, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 0.4050152957904261, |
|
"grad_norm": 0.021717887371778488, |
|
"learning_rate": 2.6076697726102913e-05, |
|
"loss": 0.0223, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.4057334080170049, |
|
"grad_norm": 0.022488858550786972, |
|
"learning_rate": 2.6069482059447475e-05, |
|
"loss": 0.0487, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 0.40645152024358366, |
|
"grad_norm": 0.056543637067079544, |
|
"learning_rate": 2.606226639279203e-05, |
|
"loss": 0.06, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.4071696324701624, |
|
"grad_norm": 14.041226387023926, |
|
"learning_rate": 2.605505072613659e-05, |
|
"loss": 0.038, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 0.4078877446967412, |
|
"grad_norm": 0.8094896078109741, |
|
"learning_rate": 2.6047835059481148e-05, |
|
"loss": 0.0489, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.40860585692331997, |
|
"grad_norm": 0.044881440699100494, |
|
"learning_rate": 2.6040619392825703e-05, |
|
"loss": 0.057, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 0.40932396914989877, |
|
"grad_norm": 0.0832076370716095, |
|
"learning_rate": 2.6033403726170262e-05, |
|
"loss": 0.0505, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.4100420813764775, |
|
"grad_norm": 15.860810279846191, |
|
"learning_rate": 2.602618805951482e-05, |
|
"loss": 0.0532, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 0.41076019360305627, |
|
"grad_norm": 0.11455155164003372, |
|
"learning_rate": 2.6018972392859376e-05, |
|
"loss": 0.0585, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.41147830582963507, |
|
"grad_norm": 0.07686082273721695, |
|
"learning_rate": 2.6011756726203935e-05, |
|
"loss": 0.0818, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 0.4121964180562138, |
|
"grad_norm": 63.3516731262207, |
|
"learning_rate": 2.6004541059548493e-05, |
|
"loss": 0.0771, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.41291453028279257, |
|
"grad_norm": 0.06112220883369446, |
|
"learning_rate": 2.599732539289305e-05, |
|
"loss": 0.0917, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 0.4136326425093714, |
|
"grad_norm": 20.554012298583984, |
|
"learning_rate": 2.5990109726237607e-05, |
|
"loss": 0.0626, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.4143507547359501, |
|
"grad_norm": 0.05520211532711983, |
|
"learning_rate": 2.5982894059582166e-05, |
|
"loss": 0.046, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 0.4150688669625289, |
|
"grad_norm": 0.0589030385017395, |
|
"learning_rate": 2.597567839292672e-05, |
|
"loss": 0.034, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.4157869791891077, |
|
"grad_norm": 0.03793846070766449, |
|
"learning_rate": 2.596846272627128e-05, |
|
"loss": 0.0324, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 0.4165050914156864, |
|
"grad_norm": 0.02868301048874855, |
|
"learning_rate": 2.596124705961584e-05, |
|
"loss": 0.0415, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.41722320364226523, |
|
"grad_norm": 14.847681045532227, |
|
"learning_rate": 2.5954031392960398e-05, |
|
"loss": 0.0395, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 0.417941315868844, |
|
"grad_norm": 0.06720839440822601, |
|
"learning_rate": 2.5946815726304953e-05, |
|
"loss": 0.0361, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.4186594280954227, |
|
"grad_norm": 0.03984254598617554, |
|
"learning_rate": 2.5939600059649512e-05, |
|
"loss": 0.0498, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 0.41937754032200153, |
|
"grad_norm": 1.7163082361221313, |
|
"learning_rate": 2.593238439299407e-05, |
|
"loss": 0.0174, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.4200956525485803, |
|
"grad_norm": 0.014802789315581322, |
|
"learning_rate": 2.5925168726338626e-05, |
|
"loss": 0.0374, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 0.4208137647751591, |
|
"grad_norm": 0.017352402210235596, |
|
"learning_rate": 2.5917953059683185e-05, |
|
"loss": 0.0211, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.42153187700173783, |
|
"grad_norm": 0.07344254106283188, |
|
"learning_rate": 2.5910737393027743e-05, |
|
"loss": 0.0502, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 0.4222499892283166, |
|
"grad_norm": 0.03359295800328255, |
|
"learning_rate": 2.59035217263723e-05, |
|
"loss": 0.0258, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.4229681014548954, |
|
"grad_norm": 0.45311981439590454, |
|
"learning_rate": 2.5896306059716857e-05, |
|
"loss": 0.0318, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 0.42368621368147413, |
|
"grad_norm": 0.3234398663043976, |
|
"learning_rate": 2.5889090393061416e-05, |
|
"loss": 0.0546, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.42440432590805294, |
|
"grad_norm": 0.024956252425909042, |
|
"learning_rate": 2.588187472640597e-05, |
|
"loss": 0.0266, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 0.4251224381346317, |
|
"grad_norm": 0.10940051078796387, |
|
"learning_rate": 2.587465905975053e-05, |
|
"loss": 0.0897, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.42584055036121043, |
|
"grad_norm": 0.07653014361858368, |
|
"learning_rate": 2.586744339309509e-05, |
|
"loss": 0.0327, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 0.42655866258778924, |
|
"grad_norm": 0.1112053170800209, |
|
"learning_rate": 2.5860227726439644e-05, |
|
"loss": 0.0514, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.427276774814368, |
|
"grad_norm": 2.1015751361846924, |
|
"learning_rate": 2.5853012059784206e-05, |
|
"loss": 0.119, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 0.42799488704094674, |
|
"grad_norm": 0.05717989802360535, |
|
"learning_rate": 2.584579639312876e-05, |
|
"loss": 0.0536, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.42871299926752554, |
|
"grad_norm": 32.660850524902344, |
|
"learning_rate": 2.583858072647332e-05, |
|
"loss": 0.0992, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 0.4294311114941043, |
|
"grad_norm": 0.06266611069440842, |
|
"learning_rate": 2.583136505981788e-05, |
|
"loss": 0.0568, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.4301492237206831, |
|
"grad_norm": 0.0731596052646637, |
|
"learning_rate": 2.5824149393162434e-05, |
|
"loss": 0.0534, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 0.43086733594726184, |
|
"grad_norm": 0.0834427997469902, |
|
"learning_rate": 2.5816933726506993e-05, |
|
"loss": 0.0688, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.4315854481738406, |
|
"grad_norm": 0.917650580406189, |
|
"learning_rate": 2.5809718059851552e-05, |
|
"loss": 0.0559, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 0.4323035604004194, |
|
"grad_norm": 0.11482948809862137, |
|
"learning_rate": 2.5802646706529218e-05, |
|
"loss": 0.0669, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.43302167262699814, |
|
"grad_norm": 0.0454438179731369, |
|
"learning_rate": 2.5795431039873776e-05, |
|
"loss": 0.0308, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 0.4337397848535769, |
|
"grad_norm": 0.20670810341835022, |
|
"learning_rate": 2.578821537321833e-05, |
|
"loss": 0.0416, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.4344578970801557, |
|
"grad_norm": 0.03354871645569801, |
|
"learning_rate": 2.578099970656289e-05, |
|
"loss": 0.0222, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 0.43517600930673445, |
|
"grad_norm": 5.863499164581299, |
|
"learning_rate": 2.577378403990745e-05, |
|
"loss": 0.0687, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.43589412153331325, |
|
"grad_norm": 0.5545240044593811, |
|
"learning_rate": 2.5766568373252004e-05, |
|
"loss": 0.0578, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 0.436612233759892, |
|
"grad_norm": 0.10319915413856506, |
|
"learning_rate": 2.5759352706596563e-05, |
|
"loss": 0.0471, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.43733034598647075, |
|
"grad_norm": 0.030765533447265625, |
|
"learning_rate": 2.5752137039941122e-05, |
|
"loss": 0.0519, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 0.43804845821304955, |
|
"grad_norm": 477.0625305175781, |
|
"learning_rate": 2.5745209999951898e-05, |
|
"loss": 0.0669, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.4387665704396283, |
|
"grad_norm": 228.48228454589844, |
|
"learning_rate": 2.5737994333296453e-05, |
|
"loss": 0.0614, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 0.43948468266620705, |
|
"grad_norm": 1.823301076889038, |
|
"learning_rate": 2.5730778666641012e-05, |
|
"loss": 0.052, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.44020279489278585, |
|
"grad_norm": 0.8825114369392395, |
|
"learning_rate": 2.572356299998557e-05, |
|
"loss": 0.0514, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 0.4409209071193646, |
|
"grad_norm": 0.08648502081632614, |
|
"learning_rate": 2.5716347333330126e-05, |
|
"loss": 0.0344, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.4416390193459434, |
|
"grad_norm": 0.038441527634859085, |
|
"learning_rate": 2.5709275980007792e-05, |
|
"loss": 0.055, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 0.44235713157252216, |
|
"grad_norm": 0.09304679930210114, |
|
"learning_rate": 2.570206031335235e-05, |
|
"loss": 0.0497, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.4430752437991009, |
|
"grad_norm": 1.5768539905548096, |
|
"learning_rate": 2.569484464669691e-05, |
|
"loss": 0.0578, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 0.4437933560256797, |
|
"grad_norm": 0.02988545596599579, |
|
"learning_rate": 2.5687628980041468e-05, |
|
"loss": 0.0325, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.44451146825225846, |
|
"grad_norm": 0.10128447413444519, |
|
"learning_rate": 2.5680413313386024e-05, |
|
"loss": 0.03, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 0.4452295804788372, |
|
"grad_norm": 0.06121072173118591, |
|
"learning_rate": 2.5673197646730582e-05, |
|
"loss": 0.0644, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.445947692705416, |
|
"grad_norm": 0.20161040127277374, |
|
"learning_rate": 2.566598198007514e-05, |
|
"loss": 0.0763, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 0.44666580493199476, |
|
"grad_norm": 0.619103729724884, |
|
"learning_rate": 2.5658766313419696e-05, |
|
"loss": 0.0457, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.44738391715857356, |
|
"grad_norm": 1.6349852085113525, |
|
"learning_rate": 2.5651550646764255e-05, |
|
"loss": 0.052, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 0.4481020293851523, |
|
"grad_norm": 0.12041996419429779, |
|
"learning_rate": 2.5644334980108814e-05, |
|
"loss": 0.0495, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.44882014161173106, |
|
"grad_norm": 3.013185977935791, |
|
"learning_rate": 2.5637119313453372e-05, |
|
"loss": 0.0597, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 0.44953825383830986, |
|
"grad_norm": 0.07766876369714737, |
|
"learning_rate": 2.5629903646797928e-05, |
|
"loss": 0.0674, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.4502563660648886, |
|
"grad_norm": 0.4663377106189728, |
|
"learning_rate": 2.5622687980142487e-05, |
|
"loss": 0.0701, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 0.4509744782914674, |
|
"grad_norm": 0.045421402901411057, |
|
"learning_rate": 2.5615472313487045e-05, |
|
"loss": 0.0302, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.45169259051804617, |
|
"grad_norm": 0.19363941252231598, |
|
"learning_rate": 2.56082566468316e-05, |
|
"loss": 0.074, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 0.4524107027446249, |
|
"grad_norm": 3.0277023315429688, |
|
"learning_rate": 2.560104098017616e-05, |
|
"loss": 0.0802, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.4531288149712037, |
|
"grad_norm": 87.38606262207031, |
|
"learning_rate": 2.5593825313520718e-05, |
|
"loss": 0.0384, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 0.45384692719778247, |
|
"grad_norm": 0.049042146652936935, |
|
"learning_rate": 2.5586609646865273e-05, |
|
"loss": 0.0169, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.4545650394243612, |
|
"grad_norm": 0.051833972334861755, |
|
"learning_rate": 2.5579393980209832e-05, |
|
"loss": 0.0518, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 0.45528315165094, |
|
"grad_norm": 0.03294980525970459, |
|
"learning_rate": 2.557217831355439e-05, |
|
"loss": 0.0493, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.45600126387751877, |
|
"grad_norm": 19.67607307434082, |
|
"learning_rate": 2.5564962646898946e-05, |
|
"loss": 0.0251, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 0.4567193761040976, |
|
"grad_norm": 0.062403663992881775, |
|
"learning_rate": 2.5557746980243505e-05, |
|
"loss": 0.0705, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.4574374883306763, |
|
"grad_norm": 2.5241332054138184, |
|
"learning_rate": 2.5550531313588064e-05, |
|
"loss": 0.0559, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 0.45815560055725507, |
|
"grad_norm": 0.09829045087099075, |
|
"learning_rate": 2.554331564693262e-05, |
|
"loss": 0.0732, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.4588737127838339, |
|
"grad_norm": 0.07366979122161865, |
|
"learning_rate": 2.553609998027718e-05, |
|
"loss": 0.0412, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 0.4595918250104126, |
|
"grad_norm": 0.4203979969024658, |
|
"learning_rate": 2.5528884313621736e-05, |
|
"loss": 0.0524, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.4603099372369914, |
|
"grad_norm": 0.07752757519483566, |
|
"learning_rate": 2.5521668646966292e-05, |
|
"loss": 0.0611, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 0.4610280494635702, |
|
"grad_norm": 0.09692207723855972, |
|
"learning_rate": 2.5514452980310854e-05, |
|
"loss": 0.0708, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.4617461616901489, |
|
"grad_norm": 0.07400350272655487, |
|
"learning_rate": 2.550723731365541e-05, |
|
"loss": 0.0525, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 0.46246427391672773, |
|
"grad_norm": 0.05159100517630577, |
|
"learning_rate": 2.5500021646999965e-05, |
|
"loss": 0.0549, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.4631823861433065, |
|
"grad_norm": 35.71484375, |
|
"learning_rate": 2.5492805980344527e-05, |
|
"loss": 0.0529, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 0.46390049836988523, |
|
"grad_norm": 0.07319821417331696, |
|
"learning_rate": 2.5485590313689082e-05, |
|
"loss": 0.0655, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.46461861059646403, |
|
"grad_norm": 2.4547245502471924, |
|
"learning_rate": 2.5478374647033637e-05, |
|
"loss": 0.0454, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 0.4653367228230428, |
|
"grad_norm": 0.10266736894845963, |
|
"learning_rate": 2.54711589803782e-05, |
|
"loss": 0.0313, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.46605483504962153, |
|
"grad_norm": 5.870818138122559, |
|
"learning_rate": 2.5463943313722755e-05, |
|
"loss": 0.0643, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 0.46677294727620033, |
|
"grad_norm": 0.31677067279815674, |
|
"learning_rate": 2.545672764706731e-05, |
|
"loss": 0.0628, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.4674910595027791, |
|
"grad_norm": 0.07288742810487747, |
|
"learning_rate": 2.5449511980411872e-05, |
|
"loss": 0.0863, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 0.4682091717293579, |
|
"grad_norm": 0.07232692092657089, |
|
"learning_rate": 2.5442296313756428e-05, |
|
"loss": 0.0583, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.46892728395593664, |
|
"grad_norm": 1.5541493892669678, |
|
"learning_rate": 2.5435080647100986e-05, |
|
"loss": 0.0652, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 0.4696453961825154, |
|
"grad_norm": 0.14363867044448853, |
|
"learning_rate": 2.5427864980445545e-05, |
|
"loss": 0.0452, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.4703635084090942, |
|
"grad_norm": 0.07765737920999527, |
|
"learning_rate": 2.54206493137901e-05, |
|
"loss": 0.0915, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 0.47108162063567294, |
|
"grad_norm": 0.6616801023483276, |
|
"learning_rate": 2.541343364713466e-05, |
|
"loss": 0.0343, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.4717997328622517, |
|
"grad_norm": 0.11457184702157974, |
|
"learning_rate": 2.5406217980479218e-05, |
|
"loss": 0.0401, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 0.4725178450888305, |
|
"grad_norm": 0.03043946996331215, |
|
"learning_rate": 2.5399002313823773e-05, |
|
"loss": 0.0659, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.47323595731540924, |
|
"grad_norm": 0.04275374487042427, |
|
"learning_rate": 2.539193096050144e-05, |
|
"loss": 0.0479, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 0.47395406954198804, |
|
"grad_norm": 0.036188703030347824, |
|
"learning_rate": 2.5384715293846e-05, |
|
"loss": 0.0397, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.4746721817685668, |
|
"grad_norm": 3.709174156188965, |
|
"learning_rate": 2.5377499627190556e-05, |
|
"loss": 0.0405, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 0.47539029399514554, |
|
"grad_norm": 0.039049405604600906, |
|
"learning_rate": 2.5370283960535115e-05, |
|
"loss": 0.0399, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.47610840622172435, |
|
"grad_norm": 0.03738459199666977, |
|
"learning_rate": 2.5363068293879674e-05, |
|
"loss": 0.0456, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 0.4768265184483031, |
|
"grad_norm": 0.07459737360477448, |
|
"learning_rate": 2.535585262722423e-05, |
|
"loss": 0.0718, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.4775446306748819, |
|
"grad_norm": 0.03416149318218231, |
|
"learning_rate": 2.5348636960568788e-05, |
|
"loss": 0.0249, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 0.47826274290146065, |
|
"grad_norm": 0.05686436593532562, |
|
"learning_rate": 2.5341421293913347e-05, |
|
"loss": 0.0714, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.4789808551280394, |
|
"grad_norm": 0.1602715402841568, |
|
"learning_rate": 2.5334205627257902e-05, |
|
"loss": 0.0463, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 0.4796989673546182, |
|
"grad_norm": 0.05453362315893173, |
|
"learning_rate": 2.532698996060246e-05, |
|
"loss": 0.0676, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.48041707958119695, |
|
"grad_norm": 4.763365268707275, |
|
"learning_rate": 2.531977429394702e-05, |
|
"loss": 0.0461, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 0.4811351918077757, |
|
"grad_norm": 0.06556567549705505, |
|
"learning_rate": 2.5312558627291575e-05, |
|
"loss": 0.0432, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.4818533040343545, |
|
"grad_norm": 0.05367936193943024, |
|
"learning_rate": 2.5305342960636137e-05, |
|
"loss": 0.0536, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 0.48257141626093325, |
|
"grad_norm": 0.7342302203178406, |
|
"learning_rate": 2.5298127293980692e-05, |
|
"loss": 0.1082, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.48328952848751205, |
|
"grad_norm": 1.5394067764282227, |
|
"learning_rate": 2.5290911627325247e-05, |
|
"loss": 0.0691, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 0.4840076407140908, |
|
"grad_norm": 0.10191819071769714, |
|
"learning_rate": 2.528369596066981e-05, |
|
"loss": 0.0616, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.48472575294066955, |
|
"grad_norm": 0.125881627202034, |
|
"learning_rate": 2.5276480294014365e-05, |
|
"loss": 0.0565, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 0.48544386516724836, |
|
"grad_norm": 0.12268608063459396, |
|
"learning_rate": 2.526926462735892e-05, |
|
"loss": 0.0535, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.4861619773938271, |
|
"grad_norm": 0.5251419544219971, |
|
"learning_rate": 2.5262048960703482e-05, |
|
"loss": 0.0327, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 0.48688008962040585, |
|
"grad_norm": 0.049022432416677475, |
|
"learning_rate": 2.5254833294048038e-05, |
|
"loss": 0.0477, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.48759820184698466, |
|
"grad_norm": 17.507314682006836, |
|
"learning_rate": 2.5247617627392593e-05, |
|
"loss": 0.0725, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 0.4883163140735634, |
|
"grad_norm": 0.5622566938400269, |
|
"learning_rate": 2.5240401960737155e-05, |
|
"loss": 0.0539, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.4890344263001422, |
|
"grad_norm": 0.04837973043322563, |
|
"learning_rate": 2.523318629408171e-05, |
|
"loss": 0.0582, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 0.48975253852672096, |
|
"grad_norm": 0.5979802012443542, |
|
"learning_rate": 2.5225970627426266e-05, |
|
"loss": 0.0311, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.4904706507532997, |
|
"grad_norm": 0.05467730388045311, |
|
"learning_rate": 2.5218754960770828e-05, |
|
"loss": 0.0395, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 0.4911887629798785, |
|
"grad_norm": 0.0720251053571701, |
|
"learning_rate": 2.5211539294115383e-05, |
|
"loss": 0.0321, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.49190687520645726, |
|
"grad_norm": 0.05227138102054596, |
|
"learning_rate": 2.5204323627459942e-05, |
|
"loss": 0.0493, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 0.492624987433036, |
|
"grad_norm": 0.04734648019075394, |
|
"learning_rate": 2.51971079608045e-05, |
|
"loss": 0.0359, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.4933430996596148, |
|
"grad_norm": 0.2445848435163498, |
|
"learning_rate": 2.5189892294149056e-05, |
|
"loss": 0.0218, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 0.49406121188619356, |
|
"grad_norm": 1.6729121208190918, |
|
"learning_rate": 2.5182676627493615e-05, |
|
"loss": 0.0672, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.49477932411277237, |
|
"grad_norm": 0.0916346088051796, |
|
"learning_rate": 2.5175460960838173e-05, |
|
"loss": 0.0729, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 0.4954974363393511, |
|
"grad_norm": 0.07457554340362549, |
|
"learning_rate": 2.516824529418273e-05, |
|
"loss": 0.0527, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.49621554856592986, |
|
"grad_norm": 0.9593037962913513, |
|
"learning_rate": 2.5161029627527287e-05, |
|
"loss": 0.0551, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 0.49693366079250867, |
|
"grad_norm": 0.07710795849561691, |
|
"learning_rate": 2.5153813960871846e-05, |
|
"loss": 0.0708, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.4976517730190874, |
|
"grad_norm": 1.506896734237671, |
|
"learning_rate": 2.51465982942164e-05, |
|
"loss": 0.074, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 0.49836988524566617, |
|
"grad_norm": 0.15761396288871765, |
|
"learning_rate": 2.513938262756096e-05, |
|
"loss": 0.0563, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.49908799747224497, |
|
"grad_norm": 0.3991515636444092, |
|
"learning_rate": 2.513216696090552e-05, |
|
"loss": 0.0796, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 0.4998061096988237, |
|
"grad_norm": 0.10450137406587601, |
|
"learning_rate": 2.5124951294250074e-05, |
|
"loss": 0.0555, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.5005242219254025, |
|
"grad_norm": 0.0935065820813179, |
|
"learning_rate": 2.5117735627594633e-05, |
|
"loss": 0.0681, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 0.5012423341519813, |
|
"grad_norm": 0.2559933066368103, |
|
"learning_rate": 2.5110519960939192e-05, |
|
"loss": 0.0315, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.5019604463785601, |
|
"grad_norm": 0.06852543354034424, |
|
"learning_rate": 2.510330429428375e-05, |
|
"loss": 0.0636, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 0.5026785586051388, |
|
"grad_norm": 71.8512191772461, |
|
"learning_rate": 2.5096088627628306e-05, |
|
"loss": 0.0565, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.5033966708317176, |
|
"grad_norm": 3.9660584926605225, |
|
"learning_rate": 2.5088872960972865e-05, |
|
"loss": 0.078, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 0.5041147830582964, |
|
"grad_norm": 0.08855576068162918, |
|
"learning_rate": 2.5081657294317423e-05, |
|
"loss": 0.0274, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.5048328952848751, |
|
"grad_norm": 0.206807479262352, |
|
"learning_rate": 2.5074441627661982e-05, |
|
"loss": 0.0572, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 0.5055510075114539, |
|
"grad_norm": 0.07930755615234375, |
|
"learning_rate": 2.5067225961006537e-05, |
|
"loss": 0.0958, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.5062691197380327, |
|
"grad_norm": 0.08441215008497238, |
|
"learning_rate": 2.5060010294351096e-05, |
|
"loss": 0.0497, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 0.5069872319646115, |
|
"grad_norm": 0.11579915881156921, |
|
"learning_rate": 2.5052794627695655e-05, |
|
"loss": 0.0633, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.5077053441911902, |
|
"grad_norm": 0.07134761661291122, |
|
"learning_rate": 2.504557896104021e-05, |
|
"loss": 0.0241, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 0.508423456417769, |
|
"grad_norm": 0.15993690490722656, |
|
"learning_rate": 2.503836329438477e-05, |
|
"loss": 0.0801, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.5091415686443478, |
|
"grad_norm": 0.24976429343223572, |
|
"learning_rate": 2.5031147627729328e-05, |
|
"loss": 0.0803, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 0.5098596808709265, |
|
"grad_norm": 4.378284454345703, |
|
"learning_rate": 2.5023931961073883e-05, |
|
"loss": 0.0364, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.5105777930975053, |
|
"grad_norm": 0.07404322922229767, |
|
"learning_rate": 2.501671629441844e-05, |
|
"loss": 0.0604, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 0.5112959053240841, |
|
"grad_norm": 0.060134898871183395, |
|
"learning_rate": 2.5009500627763e-05, |
|
"loss": 0.0371, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.5120140175506628, |
|
"grad_norm": 0.2887961268424988, |
|
"learning_rate": 2.500228496110756e-05, |
|
"loss": 0.1102, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 0.5127321297772416, |
|
"grad_norm": 53.177459716796875, |
|
"learning_rate": 2.4995069294452114e-05, |
|
"loss": 0.0537, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.5134502420038204, |
|
"grad_norm": 3.1034953594207764, |
|
"learning_rate": 2.4987853627796673e-05, |
|
"loss": 0.071, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 0.5141683542303991, |
|
"grad_norm": 34.12720489501953, |
|
"learning_rate": 2.4980637961141232e-05, |
|
"loss": 0.1069, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.5148864664569779, |
|
"grad_norm": 0.102497898042202, |
|
"learning_rate": 2.4973422294485787e-05, |
|
"loss": 0.0722, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 0.5156045786835567, |
|
"grad_norm": 1164.751953125, |
|
"learning_rate": 2.4966206627830346e-05, |
|
"loss": 0.105, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.5163226909101354, |
|
"grad_norm": 48.3822021484375, |
|
"learning_rate": 2.4958990961174905e-05, |
|
"loss": 0.1142, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 0.5170408031367142, |
|
"grad_norm": 0.11598493903875351, |
|
"learning_rate": 2.495177529451946e-05, |
|
"loss": 0.076, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.517758915363293, |
|
"grad_norm": 16.16822052001953, |
|
"learning_rate": 2.494455962786402e-05, |
|
"loss": 0.0823, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 0.5184770275898718, |
|
"grad_norm": 1.6366539001464844, |
|
"learning_rate": 2.4937343961208577e-05, |
|
"loss": 0.0503, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.5191951398164505, |
|
"grad_norm": 3.8799993991851807, |
|
"learning_rate": 2.4930128294553133e-05, |
|
"loss": 0.0792, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 0.5199132520430293, |
|
"grad_norm": 2.2162089347839355, |
|
"learning_rate": 2.4922912627897695e-05, |
|
"loss": 0.0683, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.5206313642696081, |
|
"grad_norm": 63.66940689086914, |
|
"learning_rate": 2.491569696124225e-05, |
|
"loss": 0.0728, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 0.5213494764961868, |
|
"grad_norm": 1.301426887512207, |
|
"learning_rate": 2.4908481294586806e-05, |
|
"loss": 0.0623, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.5220675887227656, |
|
"grad_norm": 0.48745834827423096, |
|
"learning_rate": 2.4901265627931368e-05, |
|
"loss": 0.0557, |
|
"step": 36350 |
|
}, |
|
{ |
|
"epoch": 0.5227857009493444, |
|
"grad_norm": 0.7868464589118958, |
|
"learning_rate": 2.4894049961275923e-05, |
|
"loss": 0.0373, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.5235038131759231, |
|
"grad_norm": 4.382079601287842, |
|
"learning_rate": 2.488683429462048e-05, |
|
"loss": 0.0625, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 0.5242219254025019, |
|
"grad_norm": 12.262738227844238, |
|
"learning_rate": 2.487961862796504e-05, |
|
"loss": 0.0645, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.5249400376290807, |
|
"grad_norm": 1.408593773841858, |
|
"learning_rate": 2.4872402961309596e-05, |
|
"loss": 0.0888, |
|
"step": 36550 |
|
}, |
|
{ |
|
"epoch": 0.5256581498556594, |
|
"grad_norm": 0.11477218568325043, |
|
"learning_rate": 2.486518729465415e-05, |
|
"loss": 0.0614, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.5263762620822382, |
|
"grad_norm": 0.086027592420578, |
|
"learning_rate": 2.4857971627998713e-05, |
|
"loss": 0.0485, |
|
"step": 36650 |
|
}, |
|
{ |
|
"epoch": 0.527094374308817, |
|
"grad_norm": 0.03868116810917854, |
|
"learning_rate": 2.485075596134327e-05, |
|
"loss": 0.0707, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.5278124865353957, |
|
"grad_norm": 0.06335175037384033, |
|
"learning_rate": 2.4843540294687824e-05, |
|
"loss": 0.0476, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 0.5285305987619745, |
|
"grad_norm": 0.36757639050483704, |
|
"learning_rate": 2.4836324628032386e-05, |
|
"loss": 0.0448, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.5292487109885533, |
|
"grad_norm": 0.03587363660335541, |
|
"learning_rate": 2.482910896137694e-05, |
|
"loss": 0.0507, |
|
"step": 36850 |
|
}, |
|
{ |
|
"epoch": 0.5299668232151321, |
|
"grad_norm": 3.4281718730926514, |
|
"learning_rate": 2.48218932947215e-05, |
|
"loss": 0.0674, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.5306849354417108, |
|
"grad_norm": 2.3952221870422363, |
|
"learning_rate": 2.481467762806606e-05, |
|
"loss": 0.0721, |
|
"step": 36950 |
|
}, |
|
{ |
|
"epoch": 0.5314030476682896, |
|
"grad_norm": 0.034354522824287415, |
|
"learning_rate": 2.4807461961410614e-05, |
|
"loss": 0.0834, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.5321211598948684, |
|
"grad_norm": 1.9441754817962646, |
|
"learning_rate": 2.4800246294755173e-05, |
|
"loss": 0.1208, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 0.5328392721214471, |
|
"grad_norm": 15.068452835083008, |
|
"learning_rate": 2.479303062809973e-05, |
|
"loss": 0.0937, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.5335573843480259, |
|
"grad_norm": 0.0747528076171875, |
|
"learning_rate": 2.4785814961444287e-05, |
|
"loss": 0.0299, |
|
"step": 37150 |
|
}, |
|
{ |
|
"epoch": 0.5342754965746047, |
|
"grad_norm": 0.04734047129750252, |
|
"learning_rate": 2.477859929478885e-05, |
|
"loss": 0.0448, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.5349936088011834, |
|
"grad_norm": 0.04633970558643341, |
|
"learning_rate": 2.4771383628133404e-05, |
|
"loss": 0.0488, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 0.5357117210277622, |
|
"grad_norm": 0.2701416611671448, |
|
"learning_rate": 2.476416796147796e-05, |
|
"loss": 0.0407, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.536429833254341, |
|
"grad_norm": 3592.864501953125, |
|
"learning_rate": 2.4756952294822522e-05, |
|
"loss": 0.0573, |
|
"step": 37350 |
|
}, |
|
{ |
|
"epoch": 0.5371479454809197, |
|
"grad_norm": 0.03521255776286125, |
|
"learning_rate": 2.4749736628167077e-05, |
|
"loss": 0.0265, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.5378660577074985, |
|
"grad_norm": 0.056420013308525085, |
|
"learning_rate": 2.4742520961511632e-05, |
|
"loss": 0.0249, |
|
"step": 37450 |
|
}, |
|
{ |
|
"epoch": 0.5385841699340773, |
|
"grad_norm": 2.5189876556396484, |
|
"learning_rate": 2.4735305294856195e-05, |
|
"loss": 0.064, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.5393022821606561, |
|
"grad_norm": 3.7576839923858643, |
|
"learning_rate": 2.472808962820075e-05, |
|
"loss": 0.0922, |
|
"step": 37550 |
|
}, |
|
{ |
|
"epoch": 0.5400203943872348, |
|
"grad_norm": 0.048396721482276917, |
|
"learning_rate": 2.472087396154531e-05, |
|
"loss": 0.0637, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.5407385066138136, |
|
"grad_norm": 0.1982671469449997, |
|
"learning_rate": 2.4713658294889867e-05, |
|
"loss": 0.0396, |
|
"step": 37650 |
|
}, |
|
{ |
|
"epoch": 0.5414566188403924, |
|
"grad_norm": 1.0207908153533936, |
|
"learning_rate": 2.4706442628234423e-05, |
|
"loss": 0.0304, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.5421747310669711, |
|
"grad_norm": 0.02437562867999077, |
|
"learning_rate": 2.469922696157898e-05, |
|
"loss": 0.0334, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 0.5428928432935499, |
|
"grad_norm": 1.7860028743743896, |
|
"learning_rate": 2.469201129492354e-05, |
|
"loss": 0.0539, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.5436109555201287, |
|
"grad_norm": 0.15321147441864014, |
|
"learning_rate": 2.4684795628268095e-05, |
|
"loss": 0.0629, |
|
"step": 37850 |
|
}, |
|
{ |
|
"epoch": 0.5443290677467074, |
|
"grad_norm": 0.15857814252376556, |
|
"learning_rate": 2.4677579961612654e-05, |
|
"loss": 0.0685, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.5450471799732862, |
|
"grad_norm": 0.031938180327415466, |
|
"learning_rate": 2.4670364294957213e-05, |
|
"loss": 0.0584, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 0.545765292199865, |
|
"grad_norm": 0.07411850243806839, |
|
"learning_rate": 2.4663148628301768e-05, |
|
"loss": 0.0442, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.5464834044264437, |
|
"grad_norm": 2.765964984893799, |
|
"learning_rate": 2.4655932961646327e-05, |
|
"loss": 0.0919, |
|
"step": 38050 |
|
}, |
|
{ |
|
"epoch": 0.5472015166530225, |
|
"grad_norm": 4.264487266540527, |
|
"learning_rate": 2.4648717294990886e-05, |
|
"loss": 0.0493, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.5479196288796013, |
|
"grad_norm": 6.618762969970703, |
|
"learning_rate": 2.464150162833544e-05, |
|
"loss": 0.0318, |
|
"step": 38150 |
|
}, |
|
{ |
|
"epoch": 0.54863774110618, |
|
"grad_norm": 2.5328927040100098, |
|
"learning_rate": 2.463428596168e-05, |
|
"loss": 0.0643, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.5493558533327588, |
|
"grad_norm": 0.025601988658308983, |
|
"learning_rate": 2.462707029502456e-05, |
|
"loss": 0.069, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 0.5500739655593376, |
|
"grad_norm": 0.07905403524637222, |
|
"learning_rate": 2.4619854628369117e-05, |
|
"loss": 0.068, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.5507920777859164, |
|
"grad_norm": 0.034113090485334396, |
|
"learning_rate": 2.4612638961713673e-05, |
|
"loss": 0.0451, |
|
"step": 38350 |
|
}, |
|
{ |
|
"epoch": 0.5515101900124951, |
|
"grad_norm": 109.80461883544922, |
|
"learning_rate": 2.460542329505823e-05, |
|
"loss": 0.0712, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.5522283022390739, |
|
"grad_norm": 0.12972399592399597, |
|
"learning_rate": 2.459820762840279e-05, |
|
"loss": 0.042, |
|
"step": 38450 |
|
}, |
|
{ |
|
"epoch": 0.5529464144656527, |
|
"grad_norm": 0.1377432644367218, |
|
"learning_rate": 2.4590991961747345e-05, |
|
"loss": 0.0359, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.5536645266922314, |
|
"grad_norm": 0.021726874634623528, |
|
"learning_rate": 2.4583776295091904e-05, |
|
"loss": 0.0328, |
|
"step": 38550 |
|
}, |
|
{ |
|
"epoch": 0.5543826389188102, |
|
"grad_norm": 0.020801221951842308, |
|
"learning_rate": 2.4576560628436463e-05, |
|
"loss": 0.0628, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.555100751145389, |
|
"grad_norm": 0.08038417994976044, |
|
"learning_rate": 2.4569344961781018e-05, |
|
"loss": 0.0351, |
|
"step": 38650 |
|
}, |
|
{ |
|
"epoch": 0.5558188633719677, |
|
"grad_norm": 18.516040802001953, |
|
"learning_rate": 2.4562129295125577e-05, |
|
"loss": 0.0494, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.5565369755985465, |
|
"grad_norm": 0.055549539625644684, |
|
"learning_rate": 2.4554913628470136e-05, |
|
"loss": 0.06, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 0.5572550878251253, |
|
"grad_norm": 0.03651417791843414, |
|
"learning_rate": 2.454769796181469e-05, |
|
"loss": 0.04, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.557973200051704, |
|
"grad_norm": 0.015556145459413528, |
|
"learning_rate": 2.454048229515925e-05, |
|
"loss": 0.0434, |
|
"step": 38850 |
|
}, |
|
{ |
|
"epoch": 0.5586913122782828, |
|
"grad_norm": 0.011846818961203098, |
|
"learning_rate": 2.4533266628503808e-05, |
|
"loss": 0.0419, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.5594094245048616, |
|
"grad_norm": 0.011472227051854134, |
|
"learning_rate": 2.4526050961848364e-05, |
|
"loss": 0.0197, |
|
"step": 38950 |
|
}, |
|
{ |
|
"epoch": 0.5601275367314403, |
|
"grad_norm": 0.028384173288941383, |
|
"learning_rate": 2.4518835295192926e-05, |
|
"loss": 0.0454, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.5608456489580191, |
|
"grad_norm": 0.021673662588000298, |
|
"learning_rate": 2.451161962853748e-05, |
|
"loss": 0.039, |
|
"step": 39050 |
|
}, |
|
{ |
|
"epoch": 0.5615637611845979, |
|
"grad_norm": 0.01948692835867405, |
|
"learning_rate": 2.450440396188204e-05, |
|
"loss": 0.0446, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.5622818734111767, |
|
"grad_norm": 0.011734464205801487, |
|
"learning_rate": 2.44971882952266e-05, |
|
"loss": 0.0187, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 0.5629999856377554, |
|
"grad_norm": 19.443992614746094, |
|
"learning_rate": 2.4489972628571154e-05, |
|
"loss": 0.0651, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.5637180978643342, |
|
"grad_norm": 0.06333588063716888, |
|
"learning_rate": 2.4482756961915713e-05, |
|
"loss": 0.0602, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 0.564436210090913, |
|
"grad_norm": 0.03405553475022316, |
|
"learning_rate": 2.447554129526027e-05, |
|
"loss": 0.0383, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.5651543223174917, |
|
"grad_norm": 4.403933048248291, |
|
"learning_rate": 2.4468325628604827e-05, |
|
"loss": 0.0364, |
|
"step": 39350 |
|
}, |
|
{ |
|
"epoch": 0.5658724345440705, |
|
"grad_norm": 45.96012496948242, |
|
"learning_rate": 2.4461109961949385e-05, |
|
"loss": 0.0267, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.5665905467706494, |
|
"grad_norm": 0.03902297466993332, |
|
"learning_rate": 2.4453894295293944e-05, |
|
"loss": 0.0364, |
|
"step": 39450 |
|
}, |
|
{ |
|
"epoch": 0.567308658997228, |
|
"grad_norm": 0.012141493149101734, |
|
"learning_rate": 2.44466786286385e-05, |
|
"loss": 0.0419, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.5680267712238068, |
|
"grad_norm": 32.401885986328125, |
|
"learning_rate": 2.443960727531617e-05, |
|
"loss": 0.0508, |
|
"step": 39550 |
|
}, |
|
{ |
|
"epoch": 0.5687448834503857, |
|
"grad_norm": 9.376387596130371, |
|
"learning_rate": 2.4432391608660724e-05, |
|
"loss": 0.0529, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.5694629956769643, |
|
"grad_norm": 5.138739585876465, |
|
"learning_rate": 2.4425175942005283e-05, |
|
"loss": 0.042, |
|
"step": 39650 |
|
}, |
|
{ |
|
"epoch": 0.5701811079035431, |
|
"grad_norm": 0.014239504933357239, |
|
"learning_rate": 2.441796027534984e-05, |
|
"loss": 0.0397, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.570899220130122, |
|
"grad_norm": 0.016275152564048767, |
|
"learning_rate": 2.4410744608694397e-05, |
|
"loss": 0.0516, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 0.5716173323567008, |
|
"grad_norm": 0.03984437137842178, |
|
"learning_rate": 2.4403528942038955e-05, |
|
"loss": 0.0502, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.5723354445832795, |
|
"grad_norm": 0.817456066608429, |
|
"learning_rate": 2.4396313275383514e-05, |
|
"loss": 0.0471, |
|
"step": 39850 |
|
}, |
|
{ |
|
"epoch": 0.5730535568098583, |
|
"grad_norm": 0.42270442843437195, |
|
"learning_rate": 2.4389097608728073e-05, |
|
"loss": 0.0527, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.5737716690364371, |
|
"grad_norm": 0.029552064836025238, |
|
"learning_rate": 2.4381881942072628e-05, |
|
"loss": 0.0526, |
|
"step": 39950 |
|
}, |
|
{ |
|
"epoch": 0.5744897812630158, |
|
"grad_norm": 21.022872924804688, |
|
"learning_rate": 2.4374666275417187e-05, |
|
"loss": 0.053, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.5752078934895946, |
|
"grad_norm": 0.0365062914788723, |
|
"learning_rate": 2.4367450608761746e-05, |
|
"loss": 0.0596, |
|
"step": 40050 |
|
}, |
|
{ |
|
"epoch": 0.5759260057161734, |
|
"grad_norm": 0.025358131155371666, |
|
"learning_rate": 2.43602349421063e-05, |
|
"loss": 0.0578, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.576644117942752, |
|
"grad_norm": 0.033615849912166595, |
|
"learning_rate": 2.435301927545086e-05, |
|
"loss": 0.0245, |
|
"step": 40150 |
|
}, |
|
{ |
|
"epoch": 0.5773622301693309, |
|
"grad_norm": 0.030600300058722496, |
|
"learning_rate": 2.434580360879542e-05, |
|
"loss": 0.0596, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.5780803423959097, |
|
"grad_norm": 0.04510806128382683, |
|
"learning_rate": 2.4338587942139974e-05, |
|
"loss": 0.0435, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 0.5787984546224884, |
|
"grad_norm": 0.024853043258190155, |
|
"learning_rate": 2.4331372275484532e-05, |
|
"loss": 0.0391, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.5795165668490672, |
|
"grad_norm": 0.07882773131132126, |
|
"learning_rate": 2.432415660882909e-05, |
|
"loss": 0.0566, |
|
"step": 40350 |
|
}, |
|
{ |
|
"epoch": 0.580234679075646, |
|
"grad_norm": 0.04951098933815956, |
|
"learning_rate": 2.4316940942173647e-05, |
|
"loss": 0.0768, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.5809527913022247, |
|
"grad_norm": 0.09358759224414825, |
|
"learning_rate": 2.4309725275518205e-05, |
|
"loss": 0.0828, |
|
"step": 40450 |
|
}, |
|
{ |
|
"epoch": 0.5816709035288035, |
|
"grad_norm": 0.05936616286635399, |
|
"learning_rate": 2.4302509608862764e-05, |
|
"loss": 0.0406, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.5823890157553823, |
|
"grad_norm": 0.03657994046807289, |
|
"learning_rate": 2.429543825554043e-05, |
|
"loss": 0.0516, |
|
"step": 40550 |
|
}, |
|
{ |
|
"epoch": 0.5831071279819611, |
|
"grad_norm": 8.627974510192871, |
|
"learning_rate": 2.428822258888499e-05, |
|
"loss": 0.0561, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.5838252402085398, |
|
"grad_norm": 1.1441830396652222, |
|
"learning_rate": 2.4281006922229544e-05, |
|
"loss": 0.0463, |
|
"step": 40650 |
|
}, |
|
{ |
|
"epoch": 0.5845433524351186, |
|
"grad_norm": 0.07954325526952744, |
|
"learning_rate": 2.4273791255574103e-05, |
|
"loss": 0.0922, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.5852614646616974, |
|
"grad_norm": 2.8461921215057373, |
|
"learning_rate": 2.426657558891866e-05, |
|
"loss": 0.0852, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 0.5859795768882761, |
|
"grad_norm": 0.10721123218536377, |
|
"learning_rate": 2.425935992226322e-05, |
|
"loss": 0.0508, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.5866976891148549, |
|
"grad_norm": 0.0931866392493248, |
|
"learning_rate": 2.4252288568940882e-05, |
|
"loss": 0.0788, |
|
"step": 40850 |
|
}, |
|
{ |
|
"epoch": 0.5874158013414337, |
|
"grad_norm": 0.061198893934488297, |
|
"learning_rate": 2.4245072902285445e-05, |
|
"loss": 0.0379, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.5881339135680124, |
|
"grad_norm": 3.5173628330230713, |
|
"learning_rate": 2.423785723563e-05, |
|
"loss": 0.1242, |
|
"step": 40950 |
|
}, |
|
{ |
|
"epoch": 0.5888520257945912, |
|
"grad_norm": 2.4062447547912598, |
|
"learning_rate": 2.423064156897456e-05, |
|
"loss": 0.0625, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.58957013802117, |
|
"grad_norm": 31.462963104248047, |
|
"learning_rate": 2.4223425902319117e-05, |
|
"loss": 0.0261, |
|
"step": 41050 |
|
}, |
|
{ |
|
"epoch": 0.5902882502477487, |
|
"grad_norm": 0.02886788733303547, |
|
"learning_rate": 2.4216210235663673e-05, |
|
"loss": 0.0291, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.5910063624743275, |
|
"grad_norm": 0.0616585947573185, |
|
"learning_rate": 2.420899456900823e-05, |
|
"loss": 0.0408, |
|
"step": 41150 |
|
}, |
|
{ |
|
"epoch": 0.5917244747009063, |
|
"grad_norm": 0.048470042645931244, |
|
"learning_rate": 2.420177890235279e-05, |
|
"loss": 0.0297, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.5924425869274851, |
|
"grad_norm": 10.751079559326172, |
|
"learning_rate": 2.4194563235697345e-05, |
|
"loss": 0.0538, |
|
"step": 41250 |
|
}, |
|
{ |
|
"epoch": 0.5931606991540638, |
|
"grad_norm": 2.543215274810791, |
|
"learning_rate": 2.4187347569041904e-05, |
|
"loss": 0.0796, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.5938788113806426, |
|
"grad_norm": 0.0643203854560852, |
|
"learning_rate": 2.4180131902386463e-05, |
|
"loss": 0.0278, |
|
"step": 41350 |
|
}, |
|
{ |
|
"epoch": 0.5945969236072214, |
|
"grad_norm": 0.11458639800548553, |
|
"learning_rate": 2.4172916235731018e-05, |
|
"loss": 0.0531, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.5953150358338001, |
|
"grad_norm": 0.14419472217559814, |
|
"learning_rate": 2.4165700569075577e-05, |
|
"loss": 0.0717, |
|
"step": 41450 |
|
}, |
|
{ |
|
"epoch": 0.5960331480603789, |
|
"grad_norm": 84.442138671875, |
|
"learning_rate": 2.4158484902420136e-05, |
|
"loss": 0.0399, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.5967512602869577, |
|
"grad_norm": 0.08792130649089813, |
|
"learning_rate": 2.415126923576469e-05, |
|
"loss": 0.0632, |
|
"step": 41550 |
|
}, |
|
{ |
|
"epoch": 0.5974693725135364, |
|
"grad_norm": 0.21101626753807068, |
|
"learning_rate": 2.414405356910925e-05, |
|
"loss": 0.0574, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.5981874847401152, |
|
"grad_norm": 0.052498213946819305, |
|
"learning_rate": 2.413698221578692e-05, |
|
"loss": 0.0439, |
|
"step": 41650 |
|
}, |
|
{ |
|
"epoch": 0.598905596966694, |
|
"grad_norm": 0.3066045641899109, |
|
"learning_rate": 2.4129766549131474e-05, |
|
"loss": 0.0677, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.5996237091932727, |
|
"grad_norm": 0.0755561888217926, |
|
"learning_rate": 2.412255088247603e-05, |
|
"loss": 0.0518, |
|
"step": 41750 |
|
}, |
|
{ |
|
"epoch": 0.6003418214198515, |
|
"grad_norm": 0.07415340840816498, |
|
"learning_rate": 2.411533521582059e-05, |
|
"loss": 0.0503, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.6010599336464303, |
|
"grad_norm": 0.15020149946212769, |
|
"learning_rate": 2.4108119549165147e-05, |
|
"loss": 0.0817, |
|
"step": 41850 |
|
}, |
|
{ |
|
"epoch": 0.601778045873009, |
|
"grad_norm": 0.06178657338023186, |
|
"learning_rate": 2.4100903882509706e-05, |
|
"loss": 0.0601, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.6024961580995878, |
|
"grad_norm": 1.5859713554382324, |
|
"learning_rate": 2.4093688215854264e-05, |
|
"loss": 0.0599, |
|
"step": 41950 |
|
}, |
|
{ |
|
"epoch": 0.6032142703261666, |
|
"grad_norm": 1.891103744506836, |
|
"learning_rate": 2.408647254919882e-05, |
|
"loss": 0.065, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.6039323825527454, |
|
"grad_norm": 0.3377256989479065, |
|
"learning_rate": 2.407925688254338e-05, |
|
"loss": 0.0688, |
|
"step": 42050 |
|
}, |
|
{ |
|
"epoch": 0.6046504947793241, |
|
"grad_norm": 16.288217544555664, |
|
"learning_rate": 2.4072041215887937e-05, |
|
"loss": 0.0413, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.6053686070059029, |
|
"grad_norm": 0.11780572682619095, |
|
"learning_rate": 2.4064825549232493e-05, |
|
"loss": 0.0695, |
|
"step": 42150 |
|
}, |
|
{ |
|
"epoch": 0.6060867192324817, |
|
"grad_norm": 11.723837852478027, |
|
"learning_rate": 2.405760988257705e-05, |
|
"loss": 0.0692, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.6068048314590604, |
|
"grad_norm": 0.0517687052488327, |
|
"learning_rate": 2.405039421592161e-05, |
|
"loss": 0.0356, |
|
"step": 42250 |
|
}, |
|
{ |
|
"epoch": 0.6075229436856392, |
|
"grad_norm": 0.06792303919792175, |
|
"learning_rate": 2.4043178549266165e-05, |
|
"loss": 0.0443, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.608241055912218, |
|
"grad_norm": 0.09773588925600052, |
|
"learning_rate": 2.4035962882610727e-05, |
|
"loss": 0.0476, |
|
"step": 42350 |
|
}, |
|
{ |
|
"epoch": 0.6089591681387967, |
|
"grad_norm": 0.03788350522518158, |
|
"learning_rate": 2.4028747215955283e-05, |
|
"loss": 0.0362, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.6096772803653755, |
|
"grad_norm": 0.047992464154958725, |
|
"learning_rate": 2.4021531549299838e-05, |
|
"loss": 0.0679, |
|
"step": 42450 |
|
}, |
|
{ |
|
"epoch": 0.6103953925919543, |
|
"grad_norm": 0.060338061302900314, |
|
"learning_rate": 2.40143158826444e-05, |
|
"loss": 0.052, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.611113504818533, |
|
"grad_norm": 0.07789593189954758, |
|
"learning_rate": 2.4007100215988956e-05, |
|
"loss": 0.0203, |
|
"step": 42550 |
|
}, |
|
{ |
|
"epoch": 0.6118316170451118, |
|
"grad_norm": 0.03540048375725746, |
|
"learning_rate": 2.3999884549333514e-05, |
|
"loss": 0.0509, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.6125497292716906, |
|
"grad_norm": 1.3714178800582886, |
|
"learning_rate": 2.3992668882678073e-05, |
|
"loss": 0.0938, |
|
"step": 42650 |
|
}, |
|
{ |
|
"epoch": 0.6132678414982693, |
|
"grad_norm": 0.08646780997514725, |
|
"learning_rate": 2.398545321602263e-05, |
|
"loss": 0.0548, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.6139859537248481, |
|
"grad_norm": 0.10025494545698166, |
|
"learning_rate": 2.3978237549367187e-05, |
|
"loss": 0.0739, |
|
"step": 42750 |
|
}, |
|
{ |
|
"epoch": 0.6147040659514269, |
|
"grad_norm": 18.440555572509766, |
|
"learning_rate": 2.3971021882711746e-05, |
|
"loss": 0.0416, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.6154221781780057, |
|
"grad_norm": 13.679024696350098, |
|
"learning_rate": 2.39638062160563e-05, |
|
"loss": 0.5511, |
|
"step": 42850 |
|
}, |
|
{ |
|
"epoch": 0.6161402904045844, |
|
"grad_norm": 4.185995578765869, |
|
"learning_rate": 2.395659054940086e-05, |
|
"loss": 1.2419, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.6168584026311632, |
|
"grad_norm": 1.135672688484192, |
|
"learning_rate": 2.394937488274542e-05, |
|
"loss": 0.5634, |
|
"step": 42950 |
|
}, |
|
{ |
|
"epoch": 0.617576514857742, |
|
"grad_norm": 2.3285813331604004, |
|
"learning_rate": 2.3942159216089974e-05, |
|
"loss": 0.466, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.6182946270843207, |
|
"grad_norm": 1.1176297664642334, |
|
"learning_rate": 2.3934943549434533e-05, |
|
"loss": 0.0652, |
|
"step": 43050 |
|
}, |
|
{ |
|
"epoch": 0.6190127393108995, |
|
"grad_norm": 51.0164794921875, |
|
"learning_rate": 2.392772788277909e-05, |
|
"loss": 0.0546, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.6197308515374783, |
|
"grad_norm": 0.07210852950811386, |
|
"learning_rate": 2.3920512216123647e-05, |
|
"loss": 0.051, |
|
"step": 43150 |
|
}, |
|
{ |
|
"epoch": 0.620448963764057, |
|
"grad_norm": 0.8415210843086243, |
|
"learning_rate": 2.3913296549468205e-05, |
|
"loss": 0.0681, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.6211670759906358, |
|
"grad_norm": 0.32454317808151245, |
|
"learning_rate": 2.3906080882812764e-05, |
|
"loss": 0.043, |
|
"step": 43250 |
|
}, |
|
{ |
|
"epoch": 0.6218851882172146, |
|
"grad_norm": 0.062497854232788086, |
|
"learning_rate": 2.3898865216157323e-05, |
|
"loss": 0.0698, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.6226033004437933, |
|
"grad_norm": 27.095462799072266, |
|
"learning_rate": 2.3891649549501878e-05, |
|
"loss": 0.0621, |
|
"step": 43350 |
|
}, |
|
{ |
|
"epoch": 0.6233214126703721, |
|
"grad_norm": 0.11430204659700394, |
|
"learning_rate": 2.3884433882846437e-05, |
|
"loss": 0.0679, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.6240395248969509, |
|
"grad_norm": 7.836416244506836, |
|
"learning_rate": 2.3877218216190996e-05, |
|
"loss": 0.068, |
|
"step": 43450 |
|
}, |
|
{ |
|
"epoch": 0.6247576371235297, |
|
"grad_norm": 0.03914704546332359, |
|
"learning_rate": 2.387000254953555e-05, |
|
"loss": 0.0456, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.6254757493501084, |
|
"grad_norm": 0.13007937371730804, |
|
"learning_rate": 2.386278688288011e-05, |
|
"loss": 0.0488, |
|
"step": 43550 |
|
}, |
|
{ |
|
"epoch": 0.6261938615766872, |
|
"grad_norm": 0.1002429947257042, |
|
"learning_rate": 2.385557121622467e-05, |
|
"loss": 0.0534, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.626911973803266, |
|
"grad_norm": 0.10558202117681503, |
|
"learning_rate": 2.3848355549569224e-05, |
|
"loss": 0.0921, |
|
"step": 43650 |
|
}, |
|
{ |
|
"epoch": 0.6276300860298447, |
|
"grad_norm": 0.027950316667556763, |
|
"learning_rate": 2.3841139882913782e-05, |
|
"loss": 0.027, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.6283481982564235, |
|
"grad_norm": 0.0834326446056366, |
|
"learning_rate": 2.383392421625834e-05, |
|
"loss": 0.0552, |
|
"step": 43750 |
|
}, |
|
{ |
|
"epoch": 0.6290663104830023, |
|
"grad_norm": 0.03636254370212555, |
|
"learning_rate": 2.3826708549602897e-05, |
|
"loss": 0.0686, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.629784422709581, |
|
"grad_norm": 24.880334854125977, |
|
"learning_rate": 2.3819492882947455e-05, |
|
"loss": 0.0433, |
|
"step": 43850 |
|
}, |
|
{ |
|
"epoch": 0.6305025349361598, |
|
"grad_norm": 0.04653225839138031, |
|
"learning_rate": 2.3812277216292014e-05, |
|
"loss": 0.0343, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.6312206471627386, |
|
"grad_norm": 0.027844512835144997, |
|
"learning_rate": 2.380506154963657e-05, |
|
"loss": 0.0469, |
|
"step": 43950 |
|
}, |
|
{ |
|
"epoch": 0.6319387593893173, |
|
"grad_norm": 0.04555247724056244, |
|
"learning_rate": 2.379784588298113e-05, |
|
"loss": 0.0176, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.6326568716158961, |
|
"grad_norm": 0.06262699514627457, |
|
"learning_rate": 2.3790630216325687e-05, |
|
"loss": 0.0272, |
|
"step": 44050 |
|
}, |
|
{ |
|
"epoch": 0.6333749838424749, |
|
"grad_norm": 0.26737722754478455, |
|
"learning_rate": 2.3783414549670242e-05, |
|
"loss": 0.0445, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.6340930960690536, |
|
"grad_norm": 0.06617012619972229, |
|
"learning_rate": 2.3776198883014804e-05, |
|
"loss": 0.0222, |
|
"step": 44150 |
|
}, |
|
{ |
|
"epoch": 0.6348112082956324, |
|
"grad_norm": 0.02425909787416458, |
|
"learning_rate": 2.376898321635936e-05, |
|
"loss": 0.0522, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.6355293205222112, |
|
"grad_norm": 0.0274038128554821, |
|
"learning_rate": 2.3761767549703918e-05, |
|
"loss": 0.0202, |
|
"step": 44250 |
|
}, |
|
{ |
|
"epoch": 0.63624743274879, |
|
"grad_norm": 9.2883939743042, |
|
"learning_rate": 2.3754551883048477e-05, |
|
"loss": 0.1086, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.6369655449753687, |
|
"grad_norm": 0.02403036691248417, |
|
"learning_rate": 2.3747336216393032e-05, |
|
"loss": 0.0571, |
|
"step": 44350 |
|
}, |
|
{ |
|
"epoch": 0.6376836572019475, |
|
"grad_norm": 2.334660768508911, |
|
"learning_rate": 2.374012054973759e-05, |
|
"loss": 0.0452, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.6384017694285263, |
|
"grad_norm": 0.0580274872481823, |
|
"learning_rate": 2.373290488308215e-05, |
|
"loss": 0.0455, |
|
"step": 44450 |
|
}, |
|
{ |
|
"epoch": 0.639119881655105, |
|
"grad_norm": 0.04195809364318848, |
|
"learning_rate": 2.3725689216426705e-05, |
|
"loss": 0.0381, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.6398379938816838, |
|
"grad_norm": 1.6940784454345703, |
|
"learning_rate": 2.3718473549771264e-05, |
|
"loss": 0.0364, |
|
"step": 44550 |
|
}, |
|
{ |
|
"epoch": 0.6405561061082626, |
|
"grad_norm": 0.06187320500612259, |
|
"learning_rate": 2.3711257883115823e-05, |
|
"loss": 0.0816, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.6412742183348413, |
|
"grad_norm": 0.03540629893541336, |
|
"learning_rate": 2.3704042216460378e-05, |
|
"loss": 0.0352, |
|
"step": 44650 |
|
}, |
|
{ |
|
"epoch": 0.6419923305614201, |
|
"grad_norm": 0.06254927068948746, |
|
"learning_rate": 2.369682654980494e-05, |
|
"loss": 0.0472, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.6427104427879989, |
|
"grad_norm": 0.041405342519283295, |
|
"learning_rate": 2.3689610883149495e-05, |
|
"loss": 0.0404, |
|
"step": 44750 |
|
}, |
|
{ |
|
"epoch": 0.6434285550145776, |
|
"grad_norm": 0.03468115255236626, |
|
"learning_rate": 2.368239521649405e-05, |
|
"loss": 0.0436, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.6441466672411564, |
|
"grad_norm": 0.027466176077723503, |
|
"learning_rate": 2.3675179549838613e-05, |
|
"loss": 0.0201, |
|
"step": 44850 |
|
}, |
|
{ |
|
"epoch": 0.6448647794677352, |
|
"grad_norm": 0.024088064208626747, |
|
"learning_rate": 2.3667963883183168e-05, |
|
"loss": 0.0464, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.645582891694314, |
|
"grad_norm": 0.030090169981122017, |
|
"learning_rate": 2.3660748216527723e-05, |
|
"loss": 0.0711, |
|
"step": 44950 |
|
}, |
|
{ |
|
"epoch": 0.6463010039208927, |
|
"grad_norm": 0.041597750037908554, |
|
"learning_rate": 2.3653532549872286e-05, |
|
"loss": 0.0962, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.6470191161474715, |
|
"grad_norm": 0.06742467731237411, |
|
"learning_rate": 2.364631688321684e-05, |
|
"loss": 0.0578, |
|
"step": 45050 |
|
}, |
|
{ |
|
"epoch": 0.6477372283740503, |
|
"grad_norm": 1.151302695274353, |
|
"learning_rate": 2.3639101216561396e-05, |
|
"loss": 0.0372, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.648455340600629, |
|
"grad_norm": 0.024876704439520836, |
|
"learning_rate": 2.363188554990596e-05, |
|
"loss": 0.0394, |
|
"step": 45150 |
|
}, |
|
{ |
|
"epoch": 0.6491734528272078, |
|
"grad_norm": 0.05010320246219635, |
|
"learning_rate": 2.3624669883250514e-05, |
|
"loss": 0.0553, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.6498915650537866, |
|
"grad_norm": 0.03817157447338104, |
|
"learning_rate": 2.361745421659507e-05, |
|
"loss": 0.0474, |
|
"step": 45250 |
|
}, |
|
{ |
|
"epoch": 0.6506096772803653, |
|
"grad_norm": 0.22997663915157318, |
|
"learning_rate": 2.361023854993963e-05, |
|
"loss": 0.0544, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.6513277895069441, |
|
"grad_norm": 0.03478986397385597, |
|
"learning_rate": 2.3603022883284186e-05, |
|
"loss": 0.0403, |
|
"step": 45350 |
|
}, |
|
{ |
|
"epoch": 0.652045901733523, |
|
"grad_norm": 0.02963252365589142, |
|
"learning_rate": 2.3595807216628745e-05, |
|
"loss": 0.0477, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.6527640139601016, |
|
"grad_norm": 0.031180815771222115, |
|
"learning_rate": 2.3588591549973304e-05, |
|
"loss": 0.0466, |
|
"step": 45450 |
|
}, |
|
{ |
|
"epoch": 0.6534821261866804, |
|
"grad_norm": 5.740962982177734, |
|
"learning_rate": 2.358137588331786e-05, |
|
"loss": 0.0608, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.6542002384132592, |
|
"grad_norm": 0.03586834669113159, |
|
"learning_rate": 2.3574160216662418e-05, |
|
"loss": 0.0414, |
|
"step": 45550 |
|
}, |
|
{ |
|
"epoch": 0.6549183506398379, |
|
"grad_norm": 0.037541572004556656, |
|
"learning_rate": 2.3566944550006977e-05, |
|
"loss": 0.0365, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.6556364628664167, |
|
"grad_norm": 0.02523988112807274, |
|
"learning_rate": 2.3559728883351532e-05, |
|
"loss": 0.0834, |
|
"step": 45650 |
|
}, |
|
{ |
|
"epoch": 0.6563545750929956, |
|
"grad_norm": 0.09356506913900375, |
|
"learning_rate": 2.355251321669609e-05, |
|
"loss": 0.0641, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.6570726873195744, |
|
"grad_norm": 2.7426700592041016, |
|
"learning_rate": 2.354529755004065e-05, |
|
"loss": 0.0629, |
|
"step": 45750 |
|
}, |
|
{ |
|
"epoch": 0.657790799546153, |
|
"grad_norm": 0.03408382460474968, |
|
"learning_rate": 2.3538081883385205e-05, |
|
"loss": 0.0448, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.6585089117727319, |
|
"grad_norm": 0.042699478566646576, |
|
"learning_rate": 2.3530866216729764e-05, |
|
"loss": 0.0256, |
|
"step": 45850 |
|
}, |
|
{ |
|
"epoch": 0.6592270239993107, |
|
"grad_norm": 0.07323332875967026, |
|
"learning_rate": 2.3523650550074322e-05, |
|
"loss": 0.0574, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.6599451362258894, |
|
"grad_norm": 0.08284715563058853, |
|
"learning_rate": 2.3516434883418878e-05, |
|
"loss": 0.0516, |
|
"step": 45950 |
|
}, |
|
{ |
|
"epoch": 0.6606632484524682, |
|
"grad_norm": 0.05623028054833412, |
|
"learning_rate": 2.3509219216763436e-05, |
|
"loss": 0.0256, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.661381360679047, |
|
"grad_norm": 4.314274311065674, |
|
"learning_rate": 2.3502003550107995e-05, |
|
"loss": 0.0436, |
|
"step": 46050 |
|
}, |
|
{ |
|
"epoch": 0.6620994729056257, |
|
"grad_norm": 33.84172058105469, |
|
"learning_rate": 2.3494787883452554e-05, |
|
"loss": 0.0542, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.6628175851322045, |
|
"grad_norm": 0.04924549162387848, |
|
"learning_rate": 2.348757221679711e-05, |
|
"loss": 0.0574, |
|
"step": 46150 |
|
}, |
|
{ |
|
"epoch": 0.6635356973587833, |
|
"grad_norm": 0.04736483097076416, |
|
"learning_rate": 2.3480356550141668e-05, |
|
"loss": 0.0614, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.664253809585362, |
|
"grad_norm": 0.06680673360824585, |
|
"learning_rate": 2.3473140883486227e-05, |
|
"loss": 0.0511, |
|
"step": 46250 |
|
}, |
|
{ |
|
"epoch": 0.6649719218119408, |
|
"grad_norm": 0.13749228417873383, |
|
"learning_rate": 2.3465925216830785e-05, |
|
"loss": 0.0642, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.6656900340385196, |
|
"grad_norm": 1.6317678689956665, |
|
"learning_rate": 2.345885386350845e-05, |
|
"loss": 0.0787, |
|
"step": 46350 |
|
}, |
|
{ |
|
"epoch": 0.6664081462650983, |
|
"grad_norm": 0.05337180197238922, |
|
"learning_rate": 2.3451638196853006e-05, |
|
"loss": 0.0747, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.6671262584916771, |
|
"grad_norm": 6.942692279815674, |
|
"learning_rate": 2.3444422530197565e-05, |
|
"loss": 0.0694, |
|
"step": 46450 |
|
}, |
|
{ |
|
"epoch": 0.6678443707182559, |
|
"grad_norm": 0.13905298709869385, |
|
"learning_rate": 2.3437206863542124e-05, |
|
"loss": 0.0425, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.6685624829448347, |
|
"grad_norm": 18.240694046020508, |
|
"learning_rate": 2.342999119688668e-05, |
|
"loss": 0.0532, |
|
"step": 46550 |
|
}, |
|
{ |
|
"epoch": 0.6692805951714134, |
|
"grad_norm": 0.1489611268043518, |
|
"learning_rate": 2.342277553023124e-05, |
|
"loss": 0.0698, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.6699987073979922, |
|
"grad_norm": 0.06899626553058624, |
|
"learning_rate": 2.3415559863575797e-05, |
|
"loss": 0.0595, |
|
"step": 46650 |
|
}, |
|
{ |
|
"epoch": 0.670716819624571, |
|
"grad_norm": 0.0636877715587616, |
|
"learning_rate": 2.3408344196920352e-05, |
|
"loss": 0.049, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.6714349318511497, |
|
"grad_norm": 8.30612564086914, |
|
"learning_rate": 2.3401128530264914e-05, |
|
"loss": 0.0551, |
|
"step": 46750 |
|
}, |
|
{ |
|
"epoch": 0.6721530440777285, |
|
"grad_norm": 2.128549575805664, |
|
"learning_rate": 2.339391286360947e-05, |
|
"loss": 0.0701, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.6728711563043073, |
|
"grad_norm": 0.06187480315566063, |
|
"learning_rate": 2.3386697196954025e-05, |
|
"loss": 0.0827, |
|
"step": 46850 |
|
}, |
|
{ |
|
"epoch": 0.673589268530886, |
|
"grad_norm": 0.03272176533937454, |
|
"learning_rate": 2.3379481530298587e-05, |
|
"loss": 0.0612, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.6743073807574648, |
|
"grad_norm": 0.9007086753845215, |
|
"learning_rate": 2.3372265863643142e-05, |
|
"loss": 0.0303, |
|
"step": 46950 |
|
}, |
|
{ |
|
"epoch": 0.6750254929840436, |
|
"grad_norm": 0.48982569575309753, |
|
"learning_rate": 2.33650501969877e-05, |
|
"loss": 0.0446, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.6757436052106223, |
|
"grad_norm": 0.0573323518037796, |
|
"learning_rate": 2.335783453033226e-05, |
|
"loss": 0.0466, |
|
"step": 47050 |
|
}, |
|
{ |
|
"epoch": 0.6764617174372011, |
|
"grad_norm": 0.035964153707027435, |
|
"learning_rate": 2.3350618863676815e-05, |
|
"loss": 0.0385, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.6771798296637799, |
|
"grad_norm": 0.10130264610052109, |
|
"learning_rate": 2.3343403197021374e-05, |
|
"loss": 0.0615, |
|
"step": 47150 |
|
}, |
|
{ |
|
"epoch": 0.6778979418903587, |
|
"grad_norm": 0.08190751075744629, |
|
"learning_rate": 2.3336187530365932e-05, |
|
"loss": 0.0342, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.6786160541169374, |
|
"grad_norm": 0.061607372015714645, |
|
"learning_rate": 2.3328971863710488e-05, |
|
"loss": 0.0418, |
|
"step": 47250 |
|
}, |
|
{ |
|
"epoch": 0.6793341663435162, |
|
"grad_norm": 0.16257597506046295, |
|
"learning_rate": 2.3321756197055046e-05, |
|
"loss": 0.0375, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.680052278570095, |
|
"grad_norm": 3.986081123352051, |
|
"learning_rate": 2.3314540530399605e-05, |
|
"loss": 0.0343, |
|
"step": 47350 |
|
}, |
|
{ |
|
"epoch": 0.6807703907966737, |
|
"grad_norm": 0.6988328099250793, |
|
"learning_rate": 2.330732486374416e-05, |
|
"loss": 0.0137, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.6814885030232525, |
|
"grad_norm": 1.6210227012634277, |
|
"learning_rate": 2.330010919708872e-05, |
|
"loss": 0.0755, |
|
"step": 47450 |
|
}, |
|
{ |
|
"epoch": 0.6822066152498313, |
|
"grad_norm": 0.025823798030614853, |
|
"learning_rate": 2.3292893530433278e-05, |
|
"loss": 0.0405, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.68292472747641, |
|
"grad_norm": 9.932079315185547, |
|
"learning_rate": 2.3285677863777833e-05, |
|
"loss": 0.0547, |
|
"step": 47550 |
|
}, |
|
{ |
|
"epoch": 0.6836428397029888, |
|
"grad_norm": 0.060679610818624496, |
|
"learning_rate": 2.3278462197122392e-05, |
|
"loss": 0.0827, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.6843609519295676, |
|
"grad_norm": 0.05937912315130234, |
|
"learning_rate": 2.327124653046695e-05, |
|
"loss": 0.0997, |
|
"step": 47650 |
|
}, |
|
{ |
|
"epoch": 0.6850790641561463, |
|
"grad_norm": 0.19867148995399475, |
|
"learning_rate": 2.326403086381151e-05, |
|
"loss": 0.0405, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.6857971763827251, |
|
"grad_norm": 0.05542898178100586, |
|
"learning_rate": 2.3256815197156065e-05, |
|
"loss": 0.0463, |
|
"step": 47750 |
|
}, |
|
{ |
|
"epoch": 0.6865152886093039, |
|
"grad_norm": 0.027509741485118866, |
|
"learning_rate": 2.3249599530500623e-05, |
|
"loss": 0.0291, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.6872334008358826, |
|
"grad_norm": 2.161909818649292, |
|
"learning_rate": 2.3242383863845182e-05, |
|
"loss": 0.0709, |
|
"step": 47850 |
|
}, |
|
{ |
|
"epoch": 0.6879515130624614, |
|
"grad_norm": 0.05292743816971779, |
|
"learning_rate": 2.3235168197189738e-05, |
|
"loss": 0.0551, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.6886696252890402, |
|
"grad_norm": 0.043459270149469376, |
|
"learning_rate": 2.3227952530534296e-05, |
|
"loss": 0.0444, |
|
"step": 47950 |
|
}, |
|
{ |
|
"epoch": 0.689387737515619, |
|
"grad_norm": 0.08632846176624298, |
|
"learning_rate": 2.3220736863878855e-05, |
|
"loss": 0.0678, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.6901058497421977, |
|
"grad_norm": 0.12333878874778748, |
|
"learning_rate": 2.321352119722341e-05, |
|
"loss": 0.0509, |
|
"step": 48050 |
|
}, |
|
{ |
|
"epoch": 0.6908239619687765, |
|
"grad_norm": 0.0415424220263958, |
|
"learning_rate": 2.320630553056797e-05, |
|
"loss": 0.063, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.6915420741953553, |
|
"grad_norm": 0.7142232656478882, |
|
"learning_rate": 2.3199089863912528e-05, |
|
"loss": 0.0596, |
|
"step": 48150 |
|
}, |
|
{ |
|
"epoch": 0.692260186421934, |
|
"grad_norm": 0.036065757274627686, |
|
"learning_rate": 2.3191874197257083e-05, |
|
"loss": 0.0404, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.6929782986485128, |
|
"grad_norm": 0.02197784185409546, |
|
"learning_rate": 2.3184658530601645e-05, |
|
"loss": 0.025, |
|
"step": 48250 |
|
}, |
|
{ |
|
"epoch": 0.6936964108750916, |
|
"grad_norm": 12.290438652038574, |
|
"learning_rate": 2.31774428639462e-05, |
|
"loss": 0.0583, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.6944145231016703, |
|
"grad_norm": 0.08092088252305984, |
|
"learning_rate": 2.3170227197290756e-05, |
|
"loss": 0.0656, |
|
"step": 48350 |
|
}, |
|
{ |
|
"epoch": 0.6951326353282491, |
|
"grad_norm": 0.07545096427202225, |
|
"learning_rate": 2.3163011530635318e-05, |
|
"loss": 0.0505, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.6958507475548279, |
|
"grad_norm": 1.5212199687957764, |
|
"learning_rate": 2.3155795863979873e-05, |
|
"loss": 0.0378, |
|
"step": 48450 |
|
}, |
|
{ |
|
"epoch": 0.6965688597814066, |
|
"grad_norm": 0.05450819805264473, |
|
"learning_rate": 2.3148580197324432e-05, |
|
"loss": 0.0255, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.6972869720079854, |
|
"grad_norm": 0.30951157212257385, |
|
"learning_rate": 2.314136453066899e-05, |
|
"loss": 0.0911, |
|
"step": 48550 |
|
}, |
|
{ |
|
"epoch": 0.6980050842345642, |
|
"grad_norm": 0.032920509576797485, |
|
"learning_rate": 2.3134148864013546e-05, |
|
"loss": 0.0368, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.6987231964611429, |
|
"grad_norm": 0.09513316303491592, |
|
"learning_rate": 2.3126933197358105e-05, |
|
"loss": 0.0462, |
|
"step": 48650 |
|
}, |
|
{ |
|
"epoch": 0.6994413086877217, |
|
"grad_norm": 0.05050795152783394, |
|
"learning_rate": 2.3119717530702664e-05, |
|
"loss": 0.0413, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.7001594209143005, |
|
"grad_norm": 4.3214430809021, |
|
"learning_rate": 2.311250186404722e-05, |
|
"loss": 0.0649, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 0.7008775331408793, |
|
"grad_norm": 0.04189575836062431, |
|
"learning_rate": 2.3105286197391778e-05, |
|
"loss": 0.0509, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.701595645367458, |
|
"grad_norm": 57.516822814941406, |
|
"learning_rate": 2.3098070530736336e-05, |
|
"loss": 0.0302, |
|
"step": 48850 |
|
}, |
|
{ |
|
"epoch": 0.7023137575940368, |
|
"grad_norm": 0.05342291295528412, |
|
"learning_rate": 2.309085486408089e-05, |
|
"loss": 0.0981, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.7030318698206156, |
|
"grad_norm": 0.06606775522232056, |
|
"learning_rate": 2.3083639197425454e-05, |
|
"loss": 0.0375, |
|
"step": 48950 |
|
}, |
|
{ |
|
"epoch": 0.7037499820471943, |
|
"grad_norm": 4.33423376083374, |
|
"learning_rate": 2.307642353077001e-05, |
|
"loss": 0.0372, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.7044680942737731, |
|
"grad_norm": 0.1970718950033188, |
|
"learning_rate": 2.3069207864114564e-05, |
|
"loss": 0.0447, |
|
"step": 49050 |
|
}, |
|
{ |
|
"epoch": 0.7051862065003519, |
|
"grad_norm": 0.03607802465558052, |
|
"learning_rate": 2.3061992197459127e-05, |
|
"loss": 0.0762, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.7059043187269306, |
|
"grad_norm": 0.03366904705762863, |
|
"learning_rate": 2.3054776530803682e-05, |
|
"loss": 0.0685, |
|
"step": 49150 |
|
}, |
|
{ |
|
"epoch": 0.7066224309535094, |
|
"grad_norm": 7.310495376586914, |
|
"learning_rate": 2.3047560864148237e-05, |
|
"loss": 0.0862, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.7073405431800882, |
|
"grad_norm": 0.07653423398733139, |
|
"learning_rate": 2.30403451974928e-05, |
|
"loss": 0.0479, |
|
"step": 49250 |
|
}, |
|
{ |
|
"epoch": 0.7080586554066669, |
|
"grad_norm": 0.03945968300104141, |
|
"learning_rate": 2.3033129530837355e-05, |
|
"loss": 0.0297, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.7087767676332457, |
|
"grad_norm": 0.015093422494828701, |
|
"learning_rate": 2.302591386418191e-05, |
|
"loss": 0.0314, |
|
"step": 49350 |
|
}, |
|
{ |
|
"epoch": 0.7094948798598245, |
|
"grad_norm": 0.025879524648189545, |
|
"learning_rate": 2.3018698197526472e-05, |
|
"loss": 0.0603, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.7102129920864033, |
|
"grad_norm": 0.01879737712442875, |
|
"learning_rate": 2.3011482530871027e-05, |
|
"loss": 0.0545, |
|
"step": 49450 |
|
}, |
|
{ |
|
"epoch": 0.710931104312982, |
|
"grad_norm": 0.027388766407966614, |
|
"learning_rate": 2.3004266864215583e-05, |
|
"loss": 0.0252, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.7116492165395608, |
|
"grad_norm": 0.03858887404203415, |
|
"learning_rate": 2.2997051197560145e-05, |
|
"loss": 0.042, |
|
"step": 49550 |
|
}, |
|
{ |
|
"epoch": 0.7123673287661396, |
|
"grad_norm": 0.21305541694164276, |
|
"learning_rate": 2.29898355309047e-05, |
|
"loss": 0.0649, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.7130854409927183, |
|
"grad_norm": 0.13431255519390106, |
|
"learning_rate": 2.298261986424926e-05, |
|
"loss": 0.0599, |
|
"step": 49650 |
|
}, |
|
{ |
|
"epoch": 0.7138035532192971, |
|
"grad_norm": 4.919084072113037, |
|
"learning_rate": 2.2975404197593818e-05, |
|
"loss": 0.0439, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.7145216654458759, |
|
"grad_norm": 0.0446869432926178, |
|
"learning_rate": 2.2968188530938373e-05, |
|
"loss": 0.027, |
|
"step": 49750 |
|
}, |
|
{ |
|
"epoch": 0.7152397776724546, |
|
"grad_norm": 0.018511831760406494, |
|
"learning_rate": 2.2960972864282932e-05, |
|
"loss": 0.0274, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.7159578898990334, |
|
"grad_norm": 1.278128743171692, |
|
"learning_rate": 2.295375719762749e-05, |
|
"loss": 0.0496, |
|
"step": 49850 |
|
}, |
|
{ |
|
"epoch": 0.7166760021256122, |
|
"grad_norm": 0.04163791239261627, |
|
"learning_rate": 2.2946541530972046e-05, |
|
"loss": 0.0374, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.7173941143521909, |
|
"grad_norm": 2.655949115753174, |
|
"learning_rate": 2.2939325864316605e-05, |
|
"loss": 0.0497, |
|
"step": 49950 |
|
}, |
|
{ |
|
"epoch": 0.7181122265787697, |
|
"grad_norm": 2.2864792346954346, |
|
"learning_rate": 2.2932110197661163e-05, |
|
"loss": 0.0375, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.7188303388053485, |
|
"grad_norm": 1.0832637548446655, |
|
"learning_rate": 2.292489453100572e-05, |
|
"loss": 0.033, |
|
"step": 50050 |
|
}, |
|
{ |
|
"epoch": 0.7195484510319272, |
|
"grad_norm": 0.036344680935144424, |
|
"learning_rate": 2.2917678864350277e-05, |
|
"loss": 0.0441, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.720266563258506, |
|
"grad_norm": 0.07024290412664413, |
|
"learning_rate": 2.2910463197694836e-05, |
|
"loss": 0.0464, |
|
"step": 50150 |
|
}, |
|
{ |
|
"epoch": 0.7209846754850848, |
|
"grad_norm": 0.04781986400485039, |
|
"learning_rate": 2.290324753103939e-05, |
|
"loss": 0.0235, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.7217027877116636, |
|
"grad_norm": 7.497429847717285, |
|
"learning_rate": 2.289603186438395e-05, |
|
"loss": 0.0568, |
|
"step": 50250 |
|
}, |
|
{ |
|
"epoch": 0.7224208999382423, |
|
"grad_norm": 0.03674536198377609, |
|
"learning_rate": 2.288881619772851e-05, |
|
"loss": 0.0229, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.7231390121648211, |
|
"grad_norm": 27.460649490356445, |
|
"learning_rate": 2.2881600531073068e-05, |
|
"loss": 0.0394, |
|
"step": 50350 |
|
}, |
|
{ |
|
"epoch": 0.7238571243913999, |
|
"grad_norm": 0.5803928375244141, |
|
"learning_rate": 2.2874384864417623e-05, |
|
"loss": 0.0319, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.7245752366179786, |
|
"grad_norm": 0.22803975641727448, |
|
"learning_rate": 2.286716919776218e-05, |
|
"loss": 0.0652, |
|
"step": 50450 |
|
}, |
|
{ |
|
"epoch": 0.7252933488445574, |
|
"grad_norm": 0.061174940317869186, |
|
"learning_rate": 2.285995353110674e-05, |
|
"loss": 0.0387, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.7260114610711362, |
|
"grad_norm": 9.43375015258789, |
|
"learning_rate": 2.28527378644513e-05, |
|
"loss": 0.0538, |
|
"step": 50550 |
|
}, |
|
{ |
|
"epoch": 0.7267295732977149, |
|
"grad_norm": 0.11616658419370651, |
|
"learning_rate": 2.2845522197795854e-05, |
|
"loss": 0.0566, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.7274476855242937, |
|
"grad_norm": 0.30462756752967834, |
|
"learning_rate": 2.2838306531140413e-05, |
|
"loss": 0.0836, |
|
"step": 50650 |
|
}, |
|
{ |
|
"epoch": 0.7281657977508725, |
|
"grad_norm": 2.478368043899536, |
|
"learning_rate": 2.2831090864484972e-05, |
|
"loss": 0.0556, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.7288839099774512, |
|
"grad_norm": 1.8832333087921143, |
|
"learning_rate": 2.2823875197829527e-05, |
|
"loss": 0.0547, |
|
"step": 50750 |
|
}, |
|
{ |
|
"epoch": 0.72960202220403, |
|
"grad_norm": 0.07902120053768158, |
|
"learning_rate": 2.2816659531174086e-05, |
|
"loss": 0.0332, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.7303201344306088, |
|
"grad_norm": 3.213438034057617, |
|
"learning_rate": 2.2809443864518645e-05, |
|
"loss": 0.0287, |
|
"step": 50850 |
|
}, |
|
{ |
|
"epoch": 0.7310382466571876, |
|
"grad_norm": 1.2780394554138184, |
|
"learning_rate": 2.28022281978632e-05, |
|
"loss": 0.0519, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.7317563588837663, |
|
"grad_norm": 0.04258378595113754, |
|
"learning_rate": 2.279501253120776e-05, |
|
"loss": 0.0229, |
|
"step": 50950 |
|
}, |
|
{ |
|
"epoch": 0.7324744711103451, |
|
"grad_norm": 0.04968509078025818, |
|
"learning_rate": 2.2787796864552317e-05, |
|
"loss": 0.0492, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.7331925833369239, |
|
"grad_norm": 15.08222770690918, |
|
"learning_rate": 2.2780581197896876e-05, |
|
"loss": 0.0727, |
|
"step": 51050 |
|
}, |
|
{ |
|
"epoch": 0.7339106955635026, |
|
"grad_norm": 0.038185350596904755, |
|
"learning_rate": 2.277336553124143e-05, |
|
"loss": 0.0455, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.7346288077900814, |
|
"grad_norm": 37.51081466674805, |
|
"learning_rate": 2.276614986458599e-05, |
|
"loss": 0.0562, |
|
"step": 51150 |
|
}, |
|
{ |
|
"epoch": 0.7353469200166602, |
|
"grad_norm": 1.8346716165542603, |
|
"learning_rate": 2.275893419793055e-05, |
|
"loss": 0.0362, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.7360650322432389, |
|
"grad_norm": 0.023607393726706505, |
|
"learning_rate": 2.2751718531275104e-05, |
|
"loss": 0.0701, |
|
"step": 51250 |
|
}, |
|
{ |
|
"epoch": 0.7367831444698177, |
|
"grad_norm": 0.03187699615955353, |
|
"learning_rate": 2.2744502864619663e-05, |
|
"loss": 0.0289, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.7375012566963965, |
|
"grad_norm": 0.48830530047416687, |
|
"learning_rate": 2.273728719796422e-05, |
|
"loss": 0.0756, |
|
"step": 51350 |
|
}, |
|
{ |
|
"epoch": 0.7382193689229752, |
|
"grad_norm": 0.08487236499786377, |
|
"learning_rate": 2.2730071531308777e-05, |
|
"loss": 0.0709, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.738937481149554, |
|
"grad_norm": 0.061929915100336075, |
|
"learning_rate": 2.2722855864653336e-05, |
|
"loss": 0.0424, |
|
"step": 51450 |
|
}, |
|
{ |
|
"epoch": 0.7396555933761328, |
|
"grad_norm": 0.02025146409869194, |
|
"learning_rate": 2.2715640197997894e-05, |
|
"loss": 0.0261, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.7403737056027115, |
|
"grad_norm": 0.04376785829663277, |
|
"learning_rate": 2.270842453134245e-05, |
|
"loss": 0.062, |
|
"step": 51550 |
|
}, |
|
{ |
|
"epoch": 0.7410918178292903, |
|
"grad_norm": 0.05691489577293396, |
|
"learning_rate": 2.270120886468701e-05, |
|
"loss": 0.0492, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.7418099300558691, |
|
"grad_norm": 0.04114688187837601, |
|
"learning_rate": 2.2693993198031567e-05, |
|
"loss": 0.0477, |
|
"step": 51650 |
|
}, |
|
{ |
|
"epoch": 0.742528042282448, |
|
"grad_norm": 0.04338749125599861, |
|
"learning_rate": 2.2686777531376123e-05, |
|
"loss": 0.0524, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.7432461545090266, |
|
"grad_norm": 0.031342532485723495, |
|
"learning_rate": 2.2679561864720685e-05, |
|
"loss": 0.0278, |
|
"step": 51750 |
|
}, |
|
{ |
|
"epoch": 0.7439642667356055, |
|
"grad_norm": 24.92247200012207, |
|
"learning_rate": 2.267234619806524e-05, |
|
"loss": 0.0359, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.7446823789621843, |
|
"grad_norm": 0.12787804007530212, |
|
"learning_rate": 2.2665130531409795e-05, |
|
"loss": 0.0413, |
|
"step": 51850 |
|
}, |
|
{ |
|
"epoch": 0.745400491188763, |
|
"grad_norm": 0.013693703338503838, |
|
"learning_rate": 2.2657914864754357e-05, |
|
"loss": 0.0279, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.7461186034153418, |
|
"grad_norm": 408.0387878417969, |
|
"learning_rate": 2.2650699198098913e-05, |
|
"loss": 0.0313, |
|
"step": 51950 |
|
}, |
|
{ |
|
"epoch": 0.7468367156419206, |
|
"grad_norm": 0.014424344524741173, |
|
"learning_rate": 2.2643483531443468e-05, |
|
"loss": 0.034, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.7475548278684992, |
|
"grad_norm": 0.08368874341249466, |
|
"learning_rate": 2.263626786478803e-05, |
|
"loss": 0.0298, |
|
"step": 52050 |
|
}, |
|
{ |
|
"epoch": 0.748272940095078, |
|
"grad_norm": 0.20141181349754333, |
|
"learning_rate": 2.2629052198132586e-05, |
|
"loss": 0.0459, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.7489910523216569, |
|
"grad_norm": 0.14126254618167877, |
|
"learning_rate": 2.262183653147714e-05, |
|
"loss": 0.0477, |
|
"step": 52150 |
|
}, |
|
{ |
|
"epoch": 0.7497091645482356, |
|
"grad_norm": 0.013539042323827744, |
|
"learning_rate": 2.2614620864821703e-05, |
|
"loss": 0.0348, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.7504272767748144, |
|
"grad_norm": 0.15344727039337158, |
|
"learning_rate": 2.260740519816626e-05, |
|
"loss": 0.0672, |
|
"step": 52250 |
|
}, |
|
{ |
|
"epoch": 0.7511453890013932, |
|
"grad_norm": 0.028053514659404755, |
|
"learning_rate": 2.2600189531510814e-05, |
|
"loss": 0.0288, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.7518635012279719, |
|
"grad_norm": 0.030573882162570953, |
|
"learning_rate": 2.2592973864855376e-05, |
|
"loss": 0.0338, |
|
"step": 52350 |
|
}, |
|
{ |
|
"epoch": 0.7525816134545507, |
|
"grad_norm": 0.03588308021426201, |
|
"learning_rate": 2.258575819819993e-05, |
|
"loss": 0.0362, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.7532997256811295, |
|
"grad_norm": 0.031105153262615204, |
|
"learning_rate": 2.2578542531544493e-05, |
|
"loss": 0.0311, |
|
"step": 52450 |
|
}, |
|
{ |
|
"epoch": 0.7540178379077083, |
|
"grad_norm": 4.209131717681885, |
|
"learning_rate": 2.257132686488905e-05, |
|
"loss": 0.014, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.754735950134287, |
|
"grad_norm": 0.23591452836990356, |
|
"learning_rate": 2.2564111198233604e-05, |
|
"loss": 0.0755, |
|
"step": 52550 |
|
}, |
|
{ |
|
"epoch": 0.7554540623608658, |
|
"grad_norm": 2.0754292011260986, |
|
"learning_rate": 2.2556895531578166e-05, |
|
"loss": 0.026, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.7561721745874446, |
|
"grad_norm": 0.00931734312325716, |
|
"learning_rate": 2.254967986492272e-05, |
|
"loss": 0.0423, |
|
"step": 52650 |
|
}, |
|
{ |
|
"epoch": 0.7568902868140233, |
|
"grad_norm": 0.04183708503842354, |
|
"learning_rate": 2.2542464198267277e-05, |
|
"loss": 0.0501, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.7576083990406021, |
|
"grad_norm": 1.4638354778289795, |
|
"learning_rate": 2.253524853161184e-05, |
|
"loss": 0.0654, |
|
"step": 52750 |
|
}, |
|
{ |
|
"epoch": 0.7583265112671809, |
|
"grad_norm": 0.03767836093902588, |
|
"learning_rate": 2.2528032864956394e-05, |
|
"loss": 0.0318, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.7590446234937596, |
|
"grad_norm": 0.021045122295618057, |
|
"learning_rate": 2.252081719830095e-05, |
|
"loss": 0.0148, |
|
"step": 52850 |
|
}, |
|
{ |
|
"epoch": 0.7597627357203384, |
|
"grad_norm": 0.024530332535505295, |
|
"learning_rate": 2.251360153164551e-05, |
|
"loss": 0.0532, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.7604808479469172, |
|
"grad_norm": 0.029710279777646065, |
|
"learning_rate": 2.2506385864990067e-05, |
|
"loss": 0.0595, |
|
"step": 52950 |
|
}, |
|
{ |
|
"epoch": 0.7611989601734959, |
|
"grad_norm": 0.04434856027364731, |
|
"learning_rate": 2.2499170198334622e-05, |
|
"loss": 0.036, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.7619170724000747, |
|
"grad_norm": 18.918506622314453, |
|
"learning_rate": 2.2491954531679184e-05, |
|
"loss": 0.0553, |
|
"step": 53050 |
|
}, |
|
{ |
|
"epoch": 0.7626351846266535, |
|
"grad_norm": 0.049203574657440186, |
|
"learning_rate": 2.248473886502374e-05, |
|
"loss": 0.0348, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.7633532968532323, |
|
"grad_norm": 1.5684940814971924, |
|
"learning_rate": 2.24775231983683e-05, |
|
"loss": 0.0347, |
|
"step": 53150 |
|
}, |
|
{ |
|
"epoch": 0.764071409079811, |
|
"grad_norm": 0.02496212162077427, |
|
"learning_rate": 2.2470307531712857e-05, |
|
"loss": 0.0626, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.7647895213063898, |
|
"grad_norm": 0.024081533774733543, |
|
"learning_rate": 2.2463091865057412e-05, |
|
"loss": 0.0538, |
|
"step": 53250 |
|
}, |
|
{ |
|
"epoch": 0.7655076335329686, |
|
"grad_norm": 0.460574746131897, |
|
"learning_rate": 2.245587619840197e-05, |
|
"loss": 0.0426, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.7662257457595473, |
|
"grad_norm": 0.01899988390505314, |
|
"learning_rate": 2.244866053174653e-05, |
|
"loss": 0.0283, |
|
"step": 53350 |
|
}, |
|
{ |
|
"epoch": 0.7669438579861261, |
|
"grad_norm": 2.701721668243408, |
|
"learning_rate": 2.2441444865091085e-05, |
|
"loss": 0.0628, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.7676619702127049, |
|
"grad_norm": 0.5999152660369873, |
|
"learning_rate": 2.2434229198435644e-05, |
|
"loss": 0.019, |
|
"step": 53450 |
|
}, |
|
{ |
|
"epoch": 0.7683800824392836, |
|
"grad_norm": 0.2346739023923874, |
|
"learning_rate": 2.2427013531780203e-05, |
|
"loss": 0.035, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.7690981946658624, |
|
"grad_norm": 0.01378537155687809, |
|
"learning_rate": 2.2419797865124758e-05, |
|
"loss": 0.0494, |
|
"step": 53550 |
|
}, |
|
{ |
|
"epoch": 0.7698163068924412, |
|
"grad_norm": 0.010730593465268612, |
|
"learning_rate": 2.2412582198469317e-05, |
|
"loss": 0.0394, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.7705344191190199, |
|
"grad_norm": 85.68353271484375, |
|
"learning_rate": 2.2405366531813875e-05, |
|
"loss": 0.0542, |
|
"step": 53650 |
|
}, |
|
{ |
|
"epoch": 0.7712525313455987, |
|
"grad_norm": 0.024320660158991814, |
|
"learning_rate": 2.239815086515843e-05, |
|
"loss": 0.039, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.7719706435721775, |
|
"grad_norm": 0.03382103890180588, |
|
"learning_rate": 2.239093519850299e-05, |
|
"loss": 0.0696, |
|
"step": 53750 |
|
}, |
|
{ |
|
"epoch": 0.7726887557987562, |
|
"grad_norm": 0.1424567699432373, |
|
"learning_rate": 2.2383719531847548e-05, |
|
"loss": 0.0431, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.773406868025335, |
|
"grad_norm": 6.650714874267578, |
|
"learning_rate": 2.2376503865192107e-05, |
|
"loss": 0.0292, |
|
"step": 53850 |
|
}, |
|
{ |
|
"epoch": 0.7741249802519138, |
|
"grad_norm": 0.012601901777088642, |
|
"learning_rate": 2.2369288198536662e-05, |
|
"loss": 0.0089, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.7748430924784926, |
|
"grad_norm": 0.017761344090104103, |
|
"learning_rate": 2.236207253188122e-05, |
|
"loss": 0.0547, |
|
"step": 53950 |
|
}, |
|
{ |
|
"epoch": 0.7755612047050713, |
|
"grad_norm": 0.021128958091139793, |
|
"learning_rate": 2.235485686522578e-05, |
|
"loss": 0.0539, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.7762793169316501, |
|
"grad_norm": 0.03616216406226158, |
|
"learning_rate": 2.2347641198570335e-05, |
|
"loss": 0.0574, |
|
"step": 54050 |
|
}, |
|
{ |
|
"epoch": 0.7769974291582289, |
|
"grad_norm": 0.0315951332449913, |
|
"learning_rate": 2.2340425531914894e-05, |
|
"loss": 0.0553, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.7777155413848076, |
|
"grad_norm": 0.1365536004304886, |
|
"learning_rate": 2.2333209865259453e-05, |
|
"loss": 0.0376, |
|
"step": 54150 |
|
}, |
|
{ |
|
"epoch": 0.7784336536113864, |
|
"grad_norm": 0.064913310110569, |
|
"learning_rate": 2.2325994198604008e-05, |
|
"loss": 0.0362, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.7791517658379652, |
|
"grad_norm": 0.2787727415561676, |
|
"learning_rate": 2.2318778531948567e-05, |
|
"loss": 0.0333, |
|
"step": 54250 |
|
}, |
|
{ |
|
"epoch": 0.7798698780645439, |
|
"grad_norm": 0.06095251441001892, |
|
"learning_rate": 2.2311562865293125e-05, |
|
"loss": 0.0442, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.7805879902911227, |
|
"grad_norm": 0.06852833181619644, |
|
"learning_rate": 2.230434719863768e-05, |
|
"loss": 0.0483, |
|
"step": 54350 |
|
}, |
|
{ |
|
"epoch": 0.7813061025177015, |
|
"grad_norm": 5.220205307006836, |
|
"learning_rate": 2.229713153198224e-05, |
|
"loss": 0.0524, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.7820242147442802, |
|
"grad_norm": 0.03756846487522125, |
|
"learning_rate": 2.2289915865326798e-05, |
|
"loss": 0.0208, |
|
"step": 54450 |
|
}, |
|
{ |
|
"epoch": 0.782742326970859, |
|
"grad_norm": 0.027425279840826988, |
|
"learning_rate": 2.2282700198671357e-05, |
|
"loss": 0.0503, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.7834604391974378, |
|
"grad_norm": 0.040080614387989044, |
|
"learning_rate": 2.2275484532015916e-05, |
|
"loss": 0.0201, |
|
"step": 54550 |
|
}, |
|
{ |
|
"epoch": 0.7841785514240165, |
|
"grad_norm": 46.412166595458984, |
|
"learning_rate": 2.226826886536047e-05, |
|
"loss": 0.0613, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.7848966636505953, |
|
"grad_norm": 8.979586601257324, |
|
"learning_rate": 2.226105319870503e-05, |
|
"loss": 0.0199, |
|
"step": 54650 |
|
}, |
|
{ |
|
"epoch": 0.7856147758771741, |
|
"grad_norm": 0.05250140652060509, |
|
"learning_rate": 2.225383753204959e-05, |
|
"loss": 0.0367, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.7863328881037529, |
|
"grad_norm": 1.2878022193908691, |
|
"learning_rate": 2.2246621865394144e-05, |
|
"loss": 0.049, |
|
"step": 54750 |
|
}, |
|
{ |
|
"epoch": 0.7870510003303316, |
|
"grad_norm": 1.8124881982803345, |
|
"learning_rate": 2.2239406198738702e-05, |
|
"loss": 0.0195, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.7877691125569104, |
|
"grad_norm": 0.08563178777694702, |
|
"learning_rate": 2.223219053208326e-05, |
|
"loss": 0.0463, |
|
"step": 54850 |
|
}, |
|
{ |
|
"epoch": 0.7884872247834892, |
|
"grad_norm": 0.10941838473081589, |
|
"learning_rate": 2.2224974865427816e-05, |
|
"loss": 0.0641, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.7892053370100679, |
|
"grad_norm": 0.09081951528787613, |
|
"learning_rate": 2.2217759198772375e-05, |
|
"loss": 0.0405, |
|
"step": 54950 |
|
}, |
|
{ |
|
"epoch": 0.7899234492366467, |
|
"grad_norm": 3.70243239402771, |
|
"learning_rate": 2.2210543532116934e-05, |
|
"loss": 0.0546, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.7906415614632255, |
|
"grad_norm": 0.4412289559841156, |
|
"learning_rate": 2.220332786546149e-05, |
|
"loss": 0.0465, |
|
"step": 55050 |
|
}, |
|
{ |
|
"epoch": 0.7913596736898042, |
|
"grad_norm": 3.0478625297546387, |
|
"learning_rate": 2.2196112198806048e-05, |
|
"loss": 0.0307, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.792077785916383, |
|
"grad_norm": 26.179941177368164, |
|
"learning_rate": 2.2188896532150607e-05, |
|
"loss": 0.0603, |
|
"step": 55150 |
|
}, |
|
{ |
|
"epoch": 0.7927958981429618, |
|
"grad_norm": 6.677818298339844, |
|
"learning_rate": 2.2181680865495162e-05, |
|
"loss": 0.0224, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.7935140103695405, |
|
"grad_norm": 0.05255184322595596, |
|
"learning_rate": 2.2174465198839724e-05, |
|
"loss": 0.0374, |
|
"step": 55250 |
|
}, |
|
{ |
|
"epoch": 0.7942321225961193, |
|
"grad_norm": 0.07613476365804672, |
|
"learning_rate": 2.216724953218428e-05, |
|
"loss": 0.0273, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.7949502348226981, |
|
"grad_norm": 0.021911421790719032, |
|
"learning_rate": 2.2160033865528835e-05, |
|
"loss": 0.0326, |
|
"step": 55350 |
|
}, |
|
{ |
|
"epoch": 0.7956683470492769, |
|
"grad_norm": 2.5375399589538574, |
|
"learning_rate": 2.2152818198873397e-05, |
|
"loss": 0.0549, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.7963864592758556, |
|
"grad_norm": 0.011430012993514538, |
|
"learning_rate": 2.2145602532217952e-05, |
|
"loss": 0.0511, |
|
"step": 55450 |
|
}, |
|
{ |
|
"epoch": 0.7971045715024344, |
|
"grad_norm": 0.04169834032654762, |
|
"learning_rate": 2.2138386865562508e-05, |
|
"loss": 0.0571, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.7978226837290132, |
|
"grad_norm": 0.03755054622888565, |
|
"learning_rate": 2.213117119890707e-05, |
|
"loss": 0.0327, |
|
"step": 55550 |
|
}, |
|
{ |
|
"epoch": 0.7985407959555919, |
|
"grad_norm": 17.928287506103516, |
|
"learning_rate": 2.2123955532251625e-05, |
|
"loss": 0.0356, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.7992589081821707, |
|
"grad_norm": 0.16650213301181793, |
|
"learning_rate": 2.211673986559618e-05, |
|
"loss": 0.0801, |
|
"step": 55650 |
|
}, |
|
{ |
|
"epoch": 0.7999770204087495, |
|
"grad_norm": 0.09039030224084854, |
|
"learning_rate": 2.2109524198940742e-05, |
|
"loss": 0.0364, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.8006951326353282, |
|
"grad_norm": 0.6012157797813416, |
|
"learning_rate": 2.2102308532285298e-05, |
|
"loss": 0.0696, |
|
"step": 55750 |
|
}, |
|
{ |
|
"epoch": 0.801413244861907, |
|
"grad_norm": 0.10176081955432892, |
|
"learning_rate": 2.2095092865629853e-05, |
|
"loss": 0.0163, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.8021313570884858, |
|
"grad_norm": 0.047887321561574936, |
|
"learning_rate": 2.2087877198974415e-05, |
|
"loss": 0.0308, |
|
"step": 55850 |
|
}, |
|
{ |
|
"epoch": 0.8028494693150645, |
|
"grad_norm": 0.04983760043978691, |
|
"learning_rate": 2.208066153231897e-05, |
|
"loss": 0.0274, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.8035675815416433, |
|
"grad_norm": 0.30719447135925293, |
|
"learning_rate": 2.207344586566353e-05, |
|
"loss": 0.0187, |
|
"step": 55950 |
|
}, |
|
{ |
|
"epoch": 0.8042856937682221, |
|
"grad_norm": 0.024881890043616295, |
|
"learning_rate": 2.2066230199008088e-05, |
|
"loss": 0.0512, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.8050038059948008, |
|
"grad_norm": 11.138710975646973, |
|
"learning_rate": 2.2059014532352643e-05, |
|
"loss": 0.0306, |
|
"step": 56050 |
|
}, |
|
{ |
|
"epoch": 0.8057219182213796, |
|
"grad_norm": 0.06446365267038345, |
|
"learning_rate": 2.2051798865697202e-05, |
|
"loss": 0.0406, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.8064400304479584, |
|
"grad_norm": 0.054006390273571014, |
|
"learning_rate": 2.204458319904176e-05, |
|
"loss": 0.0479, |
|
"step": 56150 |
|
}, |
|
{ |
|
"epoch": 0.8071581426745372, |
|
"grad_norm": 1.996266484260559, |
|
"learning_rate": 2.2037367532386316e-05, |
|
"loss": 0.0436, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.8078762549011159, |
|
"grad_norm": 0.07242928445339203, |
|
"learning_rate": 2.2030151865730875e-05, |
|
"loss": 0.0208, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 0.8085943671276947, |
|
"grad_norm": 0.02184985764324665, |
|
"learning_rate": 2.2022936199075434e-05, |
|
"loss": 0.055, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.8093124793542735, |
|
"grad_norm": 0.04901851341128349, |
|
"learning_rate": 2.201572053241999e-05, |
|
"loss": 0.0448, |
|
"step": 56350 |
|
}, |
|
{ |
|
"epoch": 0.8100305915808522, |
|
"grad_norm": 0.05226970463991165, |
|
"learning_rate": 2.200850486576455e-05, |
|
"loss": 0.0315, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.810748703807431, |
|
"grad_norm": 0.006065180990844965, |
|
"learning_rate": 2.2001289199109106e-05, |
|
"loss": 0.0244, |
|
"step": 56450 |
|
}, |
|
{ |
|
"epoch": 0.8114668160340098, |
|
"grad_norm": 0.027511123567819595, |
|
"learning_rate": 2.1994073532453662e-05, |
|
"loss": 0.0454, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.8121849282605885, |
|
"grad_norm": 1.7325809001922607, |
|
"learning_rate": 2.1986857865798224e-05, |
|
"loss": 0.0392, |
|
"step": 56550 |
|
}, |
|
{ |
|
"epoch": 0.8129030404871673, |
|
"grad_norm": 33.937747955322266, |
|
"learning_rate": 2.197964219914278e-05, |
|
"loss": 0.0374, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.8136211527137461, |
|
"grad_norm": 0.05114162340760231, |
|
"learning_rate": 2.1972426532487338e-05, |
|
"loss": 0.0551, |
|
"step": 56650 |
|
}, |
|
{ |
|
"epoch": 0.8143392649403248, |
|
"grad_norm": 0.015771133825182915, |
|
"learning_rate": 2.1965210865831897e-05, |
|
"loss": 0.0464, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.8150573771669036, |
|
"grad_norm": 0.032142359763383865, |
|
"learning_rate": 2.1957995199176452e-05, |
|
"loss": 0.0581, |
|
"step": 56750 |
|
}, |
|
{ |
|
"epoch": 0.8157754893934824, |
|
"grad_norm": 8.521427154541016, |
|
"learning_rate": 2.195077953252101e-05, |
|
"loss": 0.0469, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.8164936016200612, |
|
"grad_norm": 0.06911306083202362, |
|
"learning_rate": 2.194356386586557e-05, |
|
"loss": 0.055, |
|
"step": 56850 |
|
}, |
|
{ |
|
"epoch": 0.8172117138466399, |
|
"grad_norm": 0.028982315212488174, |
|
"learning_rate": 2.1936348199210125e-05, |
|
"loss": 0.0365, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.8179298260732187, |
|
"grad_norm": 0.04234936460852623, |
|
"learning_rate": 2.1929132532554683e-05, |
|
"loss": 0.0474, |
|
"step": 56950 |
|
}, |
|
{ |
|
"epoch": 0.8186479382997975, |
|
"grad_norm": 48.88418960571289, |
|
"learning_rate": 2.1921916865899242e-05, |
|
"loss": 0.0318, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.8193660505263762, |
|
"grad_norm": 0.007455866783857346, |
|
"learning_rate": 2.1914701199243797e-05, |
|
"loss": 0.0322, |
|
"step": 57050 |
|
}, |
|
{ |
|
"epoch": 0.820084162752955, |
|
"grad_norm": 0.013227002695202827, |
|
"learning_rate": 2.1907485532588356e-05, |
|
"loss": 0.0464, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.8208022749795338, |
|
"grad_norm": 0.017085455358028412, |
|
"learning_rate": 2.1900269865932915e-05, |
|
"loss": 0.0502, |
|
"step": 57150 |
|
}, |
|
{ |
|
"epoch": 0.8215203872061125, |
|
"grad_norm": 0.9700308442115784, |
|
"learning_rate": 2.189305419927747e-05, |
|
"loss": 0.067, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.8222384994326913, |
|
"grad_norm": 0.015162572264671326, |
|
"learning_rate": 2.188583853262203e-05, |
|
"loss": 0.0436, |
|
"step": 57250 |
|
}, |
|
{ |
|
"epoch": 0.8229566116592701, |
|
"grad_norm": 1.4316377639770508, |
|
"learning_rate": 2.1878622865966588e-05, |
|
"loss": 0.0339, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.8236747238858488, |
|
"grad_norm": 0.008070161566138268, |
|
"learning_rate": 2.1871407199311146e-05, |
|
"loss": 0.0334, |
|
"step": 57350 |
|
}, |
|
{ |
|
"epoch": 0.8243928361124276, |
|
"grad_norm": 0.1689612716436386, |
|
"learning_rate": 2.1864191532655702e-05, |
|
"loss": 0.0489, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.8251109483390064, |
|
"grad_norm": 10.127570152282715, |
|
"learning_rate": 2.185697586600026e-05, |
|
"loss": 0.0928, |
|
"step": 57450 |
|
}, |
|
{ |
|
"epoch": 0.8258290605655851, |
|
"grad_norm": 0.34898841381073, |
|
"learning_rate": 2.184976019934482e-05, |
|
"loss": 0.0967, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.8265471727921639, |
|
"grad_norm": 0.0451822392642498, |
|
"learning_rate": 2.1842544532689375e-05, |
|
"loss": 0.0426, |
|
"step": 57550 |
|
}, |
|
{ |
|
"epoch": 0.8272652850187427, |
|
"grad_norm": 0.016039999201893806, |
|
"learning_rate": 2.1835328866033933e-05, |
|
"loss": 0.0262, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.8279833972453216, |
|
"grad_norm": 0.01928391307592392, |
|
"learning_rate": 2.1828113199378492e-05, |
|
"loss": 0.0528, |
|
"step": 57650 |
|
}, |
|
{ |
|
"epoch": 0.8287015094719002, |
|
"grad_norm": 0.024930743500590324, |
|
"learning_rate": 2.1820897532723047e-05, |
|
"loss": 0.0286, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.829419621698479, |
|
"grad_norm": 1.6972570419311523, |
|
"learning_rate": 2.1813681866067606e-05, |
|
"loss": 0.0308, |
|
"step": 57750 |
|
}, |
|
{ |
|
"epoch": 0.8301377339250579, |
|
"grad_norm": 3.600100040435791, |
|
"learning_rate": 2.1806466199412165e-05, |
|
"loss": 0.0596, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.8308558461516365, |
|
"grad_norm": 0.044996269047260284, |
|
"learning_rate": 2.179925053275672e-05, |
|
"loss": 0.0271, |
|
"step": 57850 |
|
}, |
|
{ |
|
"epoch": 0.8315739583782153, |
|
"grad_norm": 0.05033883824944496, |
|
"learning_rate": 2.179203486610128e-05, |
|
"loss": 0.0512, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.8322920706047942, |
|
"grad_norm": 0.043055735528469086, |
|
"learning_rate": 2.1784819199445838e-05, |
|
"loss": 0.0393, |
|
"step": 57950 |
|
}, |
|
{ |
|
"epoch": 0.8330101828313728, |
|
"grad_norm": 0.013648094609379768, |
|
"learning_rate": 2.1777603532790393e-05, |
|
"loss": 0.0292, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.8337282950579517, |
|
"grad_norm": 4.960720062255859, |
|
"learning_rate": 2.1770387866134955e-05, |
|
"loss": 0.0294, |
|
"step": 58050 |
|
}, |
|
{ |
|
"epoch": 0.8344464072845305, |
|
"grad_norm": 0.045770492404699326, |
|
"learning_rate": 2.176317219947951e-05, |
|
"loss": 0.042, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.8351645195111091, |
|
"grad_norm": 0.026327569037675858, |
|
"learning_rate": 2.1755956532824066e-05, |
|
"loss": 0.0096, |
|
"step": 58150 |
|
}, |
|
{ |
|
"epoch": 0.835882631737688, |
|
"grad_norm": 0.015447799116373062, |
|
"learning_rate": 2.1748740866168628e-05, |
|
"loss": 0.0227, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.8366007439642668, |
|
"grad_norm": 1.7331517934799194, |
|
"learning_rate": 2.1741525199513183e-05, |
|
"loss": 0.0297, |
|
"step": 58250 |
|
}, |
|
{ |
|
"epoch": 0.8373188561908455, |
|
"grad_norm": 0.031074518337845802, |
|
"learning_rate": 2.173430953285774e-05, |
|
"loss": 0.027, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.8380369684174243, |
|
"grad_norm": 0.16247278451919556, |
|
"learning_rate": 2.17270938662023e-05, |
|
"loss": 0.0526, |
|
"step": 58350 |
|
}, |
|
{ |
|
"epoch": 0.8387550806440031, |
|
"grad_norm": 0.011318527162075043, |
|
"learning_rate": 2.1719878199546856e-05, |
|
"loss": 0.0411, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.8394731928705819, |
|
"grad_norm": 0.21910272538661957, |
|
"learning_rate": 2.1712662532891415e-05, |
|
"loss": 0.0467, |
|
"step": 58450 |
|
}, |
|
{ |
|
"epoch": 0.8401913050971606, |
|
"grad_norm": 0.047498930245637894, |
|
"learning_rate": 2.1705446866235973e-05, |
|
"loss": 0.0387, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.8409094173237394, |
|
"grad_norm": 0.8327946662902832, |
|
"learning_rate": 2.169823119958053e-05, |
|
"loss": 0.0189, |
|
"step": 58550 |
|
}, |
|
{ |
|
"epoch": 0.8416275295503182, |
|
"grad_norm": 0.08080939203500748, |
|
"learning_rate": 2.1691015532925087e-05, |
|
"loss": 0.0261, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.8423456417768969, |
|
"grad_norm": 0.3818748891353607, |
|
"learning_rate": 2.1683799866269646e-05, |
|
"loss": 0.0157, |
|
"step": 58650 |
|
}, |
|
{ |
|
"epoch": 0.8430637540034757, |
|
"grad_norm": 0.173528254032135, |
|
"learning_rate": 2.16765841996142e-05, |
|
"loss": 0.0432, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.8437818662300545, |
|
"grad_norm": 7.027990818023682, |
|
"learning_rate": 2.1669368532958764e-05, |
|
"loss": 0.0362, |
|
"step": 58750 |
|
}, |
|
{ |
|
"epoch": 0.8444999784566332, |
|
"grad_norm": 0.05787297338247299, |
|
"learning_rate": 2.166215286630332e-05, |
|
"loss": 0.0425, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.845218090683212, |
|
"grad_norm": 0.012338577769696712, |
|
"learning_rate": 2.1654937199647874e-05, |
|
"loss": 0.0314, |
|
"step": 58850 |
|
}, |
|
{ |
|
"epoch": 0.8459362029097908, |
|
"grad_norm": 0.25181296467781067, |
|
"learning_rate": 2.1647721532992436e-05, |
|
"loss": 0.0335, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.8466543151363695, |
|
"grad_norm": 3.5599021911621094, |
|
"learning_rate": 2.164050586633699e-05, |
|
"loss": 0.046, |
|
"step": 58950 |
|
}, |
|
{ |
|
"epoch": 0.8473724273629483, |
|
"grad_norm": 33.42082595825195, |
|
"learning_rate": 2.1633434513014657e-05, |
|
"loss": 0.0575, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.8480905395895271, |
|
"grad_norm": 17.722803115844727, |
|
"learning_rate": 2.1626218846359216e-05, |
|
"loss": 0.024, |
|
"step": 59050 |
|
}, |
|
{ |
|
"epoch": 0.8488086518161059, |
|
"grad_norm": 0.07347019761800766, |
|
"learning_rate": 2.1619003179703775e-05, |
|
"loss": 0.0199, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.8495267640426846, |
|
"grad_norm": 0.017910439521074295, |
|
"learning_rate": 2.161178751304833e-05, |
|
"loss": 0.0336, |
|
"step": 59150 |
|
}, |
|
{ |
|
"epoch": 0.8502448762692634, |
|
"grad_norm": 0.014987743459641933, |
|
"learning_rate": 2.160457184639289e-05, |
|
"loss": 0.0373, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.8509629884958422, |
|
"grad_norm": 0.013330746442079544, |
|
"learning_rate": 2.1597500493070555e-05, |
|
"loss": 0.0231, |
|
"step": 59250 |
|
}, |
|
{ |
|
"epoch": 0.8516811007224209, |
|
"grad_norm": 0.2872665226459503, |
|
"learning_rate": 2.1590284826415113e-05, |
|
"loss": 0.0446, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.8523992129489997, |
|
"grad_norm": 0.04101574420928955, |
|
"learning_rate": 2.1583069159759672e-05, |
|
"loss": 0.0475, |
|
"step": 59350 |
|
}, |
|
{ |
|
"epoch": 0.8531173251755785, |
|
"grad_norm": 0.025700349360704422, |
|
"learning_rate": 2.1575853493104228e-05, |
|
"loss": 0.042, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.8538354374021572, |
|
"grad_norm": 0.022509241476655006, |
|
"learning_rate": 2.1568637826448786e-05, |
|
"loss": 0.036, |
|
"step": 59450 |
|
}, |
|
{ |
|
"epoch": 0.854553549628736, |
|
"grad_norm": 0.013904242776334286, |
|
"learning_rate": 2.1561422159793345e-05, |
|
"loss": 0.0276, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.8552716618553148, |
|
"grad_norm": 0.025636285543441772, |
|
"learning_rate": 2.15542064931379e-05, |
|
"loss": 0.0475, |
|
"step": 59550 |
|
}, |
|
{ |
|
"epoch": 0.8559897740818935, |
|
"grad_norm": 0.13354450464248657, |
|
"learning_rate": 2.154699082648246e-05, |
|
"loss": 0.0432, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.8567078863084723, |
|
"grad_norm": 0.057024553418159485, |
|
"learning_rate": 2.1539775159827018e-05, |
|
"loss": 0.0476, |
|
"step": 59650 |
|
}, |
|
{ |
|
"epoch": 0.8574259985350511, |
|
"grad_norm": 0.02197224833071232, |
|
"learning_rate": 2.1532559493171573e-05, |
|
"loss": 0.0551, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.8581441107616298, |
|
"grad_norm": 8.308976173400879, |
|
"learning_rate": 2.1525343826516132e-05, |
|
"loss": 0.0309, |
|
"step": 59750 |
|
}, |
|
{ |
|
"epoch": 0.8588622229882086, |
|
"grad_norm": 0.030680665746331215, |
|
"learning_rate": 2.151812815986069e-05, |
|
"loss": 0.0315, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.8595803352147874, |
|
"grad_norm": 0.03317005932331085, |
|
"learning_rate": 2.151091249320525e-05, |
|
"loss": 0.0189, |
|
"step": 59850 |
|
}, |
|
{ |
|
"epoch": 0.8602984474413662, |
|
"grad_norm": 0.0691901445388794, |
|
"learning_rate": 2.1503696826549805e-05, |
|
"loss": 0.0566, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.8610165596679449, |
|
"grad_norm": 2.728785991668701, |
|
"learning_rate": 2.1496481159894363e-05, |
|
"loss": 0.0303, |
|
"step": 59950 |
|
}, |
|
{ |
|
"epoch": 0.8617346718945237, |
|
"grad_norm": 110.03862762451172, |
|
"learning_rate": 2.1489265493238922e-05, |
|
"loss": 0.0477, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.8624527841211025, |
|
"grad_norm": 0.3772555887699127, |
|
"learning_rate": 2.1482338453249695e-05, |
|
"loss": 0.0699, |
|
"step": 60050 |
|
}, |
|
{ |
|
"epoch": 0.8631708963476812, |
|
"grad_norm": 0.03029673732817173, |
|
"learning_rate": 2.1475122786594254e-05, |
|
"loss": 0.0445, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.86388900857426, |
|
"grad_norm": 0.059018220752477646, |
|
"learning_rate": 2.1467907119938812e-05, |
|
"loss": 0.0407, |
|
"step": 60150 |
|
}, |
|
{ |
|
"epoch": 0.8646071208008388, |
|
"grad_norm": 0.018615849316120148, |
|
"learning_rate": 2.1460691453283368e-05, |
|
"loss": 0.0241, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.8653252330274175, |
|
"grad_norm": 0.048536475747823715, |
|
"learning_rate": 2.145347578662793e-05, |
|
"loss": 0.0361, |
|
"step": 60250 |
|
}, |
|
{ |
|
"epoch": 0.8660433452539963, |
|
"grad_norm": 0.02451617270708084, |
|
"learning_rate": 2.1446260119972485e-05, |
|
"loss": 0.0208, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.8667614574805751, |
|
"grad_norm": 43.35445022583008, |
|
"learning_rate": 2.143904445331704e-05, |
|
"loss": 0.0227, |
|
"step": 60350 |
|
}, |
|
{ |
|
"epoch": 0.8674795697071538, |
|
"grad_norm": 0.02983827330172062, |
|
"learning_rate": 2.1431828786661603e-05, |
|
"loss": 0.0637, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.8681976819337326, |
|
"grad_norm": 0.034458402544260025, |
|
"learning_rate": 2.1424613120006158e-05, |
|
"loss": 0.0372, |
|
"step": 60450 |
|
}, |
|
{ |
|
"epoch": 0.8689157941603114, |
|
"grad_norm": 3.9216103553771973, |
|
"learning_rate": 2.1417397453350713e-05, |
|
"loss": 0.0657, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.8696339063868902, |
|
"grad_norm": 0.036895278841257095, |
|
"learning_rate": 2.1410181786695275e-05, |
|
"loss": 0.0248, |
|
"step": 60550 |
|
}, |
|
{ |
|
"epoch": 0.8703520186134689, |
|
"grad_norm": 0.026578128337860107, |
|
"learning_rate": 2.140296612003983e-05, |
|
"loss": 0.0368, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.8710701308400477, |
|
"grad_norm": 0.0387389212846756, |
|
"learning_rate": 2.1395750453384386e-05, |
|
"loss": 0.0618, |
|
"step": 60650 |
|
}, |
|
{ |
|
"epoch": 0.8717882430666265, |
|
"grad_norm": 0.021623022854328156, |
|
"learning_rate": 2.1388534786728948e-05, |
|
"loss": 0.0489, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.8725063552932052, |
|
"grad_norm": 0.05269308388233185, |
|
"learning_rate": 2.1381319120073503e-05, |
|
"loss": 0.0339, |
|
"step": 60750 |
|
}, |
|
{ |
|
"epoch": 0.873224467519784, |
|
"grad_norm": 0.3148886561393738, |
|
"learning_rate": 2.1374103453418062e-05, |
|
"loss": 0.0268, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.8739425797463628, |
|
"grad_norm": 0.022542864084243774, |
|
"learning_rate": 2.136688778676262e-05, |
|
"loss": 0.0567, |
|
"step": 60850 |
|
}, |
|
{ |
|
"epoch": 0.8746606919729415, |
|
"grad_norm": 0.04053495079278946, |
|
"learning_rate": 2.1359672120107176e-05, |
|
"loss": 0.0141, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.8753788041995203, |
|
"grad_norm": 0.031727734953165054, |
|
"learning_rate": 2.1352456453451735e-05, |
|
"loss": 0.0357, |
|
"step": 60950 |
|
}, |
|
{ |
|
"epoch": 0.8760969164260991, |
|
"grad_norm": 0.019135266542434692, |
|
"learning_rate": 2.1345240786796294e-05, |
|
"loss": 0.0729, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.8768150286526778, |
|
"grad_norm": 0.0378669910132885, |
|
"learning_rate": 2.133802512014085e-05, |
|
"loss": 0.0322, |
|
"step": 61050 |
|
}, |
|
{ |
|
"epoch": 0.8775331408792566, |
|
"grad_norm": 0.7534425258636475, |
|
"learning_rate": 2.1330809453485408e-05, |
|
"loss": 0.0449, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.8782512531058354, |
|
"grad_norm": 0.028031494468450546, |
|
"learning_rate": 2.1323593786829966e-05, |
|
"loss": 0.0164, |
|
"step": 61150 |
|
}, |
|
{ |
|
"epoch": 0.8789693653324141, |
|
"grad_norm": 1.30186128616333, |
|
"learning_rate": 2.1316378120174522e-05, |
|
"loss": 0.0094, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.8796874775589929, |
|
"grad_norm": 0.01870095543563366, |
|
"learning_rate": 2.130916245351908e-05, |
|
"loss": 0.0413, |
|
"step": 61250 |
|
}, |
|
{ |
|
"epoch": 0.8804055897855717, |
|
"grad_norm": 0.010914398357272148, |
|
"learning_rate": 2.130194678686364e-05, |
|
"loss": 0.0453, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.8811237020121505, |
|
"grad_norm": 0.02804506942629814, |
|
"learning_rate": 2.1294731120208195e-05, |
|
"loss": 0.0662, |
|
"step": 61350 |
|
}, |
|
{ |
|
"epoch": 0.8818418142387292, |
|
"grad_norm": 0.022548319771885872, |
|
"learning_rate": 2.1287515453552753e-05, |
|
"loss": 0.0409, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.882559926465308, |
|
"grad_norm": 5.709629058837891, |
|
"learning_rate": 2.1280299786897312e-05, |
|
"loss": 0.0509, |
|
"step": 61450 |
|
}, |
|
{ |
|
"epoch": 0.8832780386918868, |
|
"grad_norm": 1.946844220161438, |
|
"learning_rate": 2.127308412024187e-05, |
|
"loss": 0.0619, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.8839961509184655, |
|
"grad_norm": 0.020764781162142754, |
|
"learning_rate": 2.126586845358643e-05, |
|
"loss": 0.02, |
|
"step": 61550 |
|
}, |
|
{ |
|
"epoch": 0.8847142631450443, |
|
"grad_norm": 0.03979656100273132, |
|
"learning_rate": 2.1258652786930985e-05, |
|
"loss": 0.0477, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.8854323753716231, |
|
"grad_norm": 1.5372118949890137, |
|
"learning_rate": 2.1251437120275544e-05, |
|
"loss": 0.0439, |
|
"step": 61650 |
|
}, |
|
{ |
|
"epoch": 0.8861504875982018, |
|
"grad_norm": 2.8347742557525635, |
|
"learning_rate": 2.1244221453620102e-05, |
|
"loss": 0.0552, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.8868685998247806, |
|
"grad_norm": 0.5707604885101318, |
|
"learning_rate": 2.1237005786964658e-05, |
|
"loss": 0.0492, |
|
"step": 61750 |
|
}, |
|
{ |
|
"epoch": 0.8875867120513594, |
|
"grad_norm": 0.018220653757452965, |
|
"learning_rate": 2.1229790120309216e-05, |
|
"loss": 0.0215, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.8883048242779381, |
|
"grad_norm": 0.025390001013875008, |
|
"learning_rate": 2.1222574453653775e-05, |
|
"loss": 0.0496, |
|
"step": 61850 |
|
}, |
|
{ |
|
"epoch": 0.8890229365045169, |
|
"grad_norm": 0.015123003162443638, |
|
"learning_rate": 2.121535878699833e-05, |
|
"loss": 0.0103, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.8897410487310957, |
|
"grad_norm": 0.02207750454545021, |
|
"learning_rate": 2.120814312034289e-05, |
|
"loss": 0.0303, |
|
"step": 61950 |
|
}, |
|
{ |
|
"epoch": 0.8904591609576744, |
|
"grad_norm": 0.30433520674705505, |
|
"learning_rate": 2.1200927453687448e-05, |
|
"loss": 0.0432, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.8911772731842532, |
|
"grad_norm": 0.3474635183811188, |
|
"learning_rate": 2.1193711787032003e-05, |
|
"loss": 0.0413, |
|
"step": 62050 |
|
}, |
|
{ |
|
"epoch": 0.891895385410832, |
|
"grad_norm": 0.011259687133133411, |
|
"learning_rate": 2.1186496120376562e-05, |
|
"loss": 0.0248, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.8926134976374108, |
|
"grad_norm": 0.32533684372901917, |
|
"learning_rate": 2.117928045372112e-05, |
|
"loss": 0.0463, |
|
"step": 62150 |
|
}, |
|
{ |
|
"epoch": 0.8933316098639895, |
|
"grad_norm": 12.774140357971191, |
|
"learning_rate": 2.117206478706568e-05, |
|
"loss": 0.0858, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.8940497220905683, |
|
"grad_norm": 1.7531408071517944, |
|
"learning_rate": 2.1164849120410235e-05, |
|
"loss": 0.0398, |
|
"step": 62250 |
|
}, |
|
{ |
|
"epoch": 0.8947678343171471, |
|
"grad_norm": 0.021759033203125, |
|
"learning_rate": 2.1157633453754793e-05, |
|
"loss": 0.0274, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.8954859465437258, |
|
"grad_norm": 0.01539881806820631, |
|
"learning_rate": 2.1150417787099352e-05, |
|
"loss": 0.0437, |
|
"step": 62350 |
|
}, |
|
{ |
|
"epoch": 0.8962040587703046, |
|
"grad_norm": 0.06527174264192581, |
|
"learning_rate": 2.1143202120443907e-05, |
|
"loss": 0.0316, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.8969221709968834, |
|
"grad_norm": 0.07329878211021423, |
|
"learning_rate": 2.1135986453788466e-05, |
|
"loss": 0.0175, |
|
"step": 62450 |
|
}, |
|
{ |
|
"epoch": 0.8976402832234621, |
|
"grad_norm": 1.6692662239074707, |
|
"learning_rate": 2.1128770787133025e-05, |
|
"loss": 0.0746, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.8983583954500409, |
|
"grad_norm": 0.054094549268484116, |
|
"learning_rate": 2.112155512047758e-05, |
|
"loss": 0.0319, |
|
"step": 62550 |
|
}, |
|
{ |
|
"epoch": 0.8990765076766197, |
|
"grad_norm": 0.02748558297753334, |
|
"learning_rate": 2.111433945382214e-05, |
|
"loss": 0.0273, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.8997946199031984, |
|
"grad_norm": 0.05098741501569748, |
|
"learning_rate": 2.1107123787166698e-05, |
|
"loss": 0.0551, |
|
"step": 62650 |
|
}, |
|
{ |
|
"epoch": 0.9005127321297772, |
|
"grad_norm": 0.03316742926836014, |
|
"learning_rate": 2.1099908120511253e-05, |
|
"loss": 0.0349, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.901230844356356, |
|
"grad_norm": 0.16046391427516937, |
|
"learning_rate": 2.1092692453855812e-05, |
|
"loss": 0.022, |
|
"step": 62750 |
|
}, |
|
{ |
|
"epoch": 0.9019489565829348, |
|
"grad_norm": 0.6568982601165771, |
|
"learning_rate": 2.108547678720037e-05, |
|
"loss": 0.0152, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.9026670688095135, |
|
"grad_norm": 0.017578423023223877, |
|
"learning_rate": 2.1078261120544926e-05, |
|
"loss": 0.042, |
|
"step": 62850 |
|
}, |
|
{ |
|
"epoch": 0.9033851810360923, |
|
"grad_norm": 1.6092095375061035, |
|
"learning_rate": 2.1071045453889488e-05, |
|
"loss": 0.0478, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.9041032932626711, |
|
"grad_norm": 0.12522783875465393, |
|
"learning_rate": 2.1063829787234043e-05, |
|
"loss": 0.045, |
|
"step": 62950 |
|
}, |
|
{ |
|
"epoch": 0.9048214054892498, |
|
"grad_norm": 0.016538824886083603, |
|
"learning_rate": 2.10566141205786e-05, |
|
"loss": 0.0262, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.9055395177158286, |
|
"grad_norm": 0.019631749019026756, |
|
"learning_rate": 2.104939845392316e-05, |
|
"loss": 0.0338, |
|
"step": 63050 |
|
}, |
|
{ |
|
"epoch": 0.9062576299424074, |
|
"grad_norm": 2.552657127380371, |
|
"learning_rate": 2.1042182787267716e-05, |
|
"loss": 0.0543, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.9069757421689861, |
|
"grad_norm": 8.885793685913086, |
|
"learning_rate": 2.103496712061227e-05, |
|
"loss": 0.0543, |
|
"step": 63150 |
|
}, |
|
{ |
|
"epoch": 0.9076938543955649, |
|
"grad_norm": 0.014104747213423252, |
|
"learning_rate": 2.1027751453956833e-05, |
|
"loss": 0.0587, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.9084119666221437, |
|
"grad_norm": 0.07887265086174011, |
|
"learning_rate": 2.102053578730139e-05, |
|
"loss": 0.0164, |
|
"step": 63250 |
|
}, |
|
{ |
|
"epoch": 0.9091300788487224, |
|
"grad_norm": 0.018778778612613678, |
|
"learning_rate": 2.1013320120645944e-05, |
|
"loss": 0.027, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.9098481910753012, |
|
"grad_norm": 0.05986733362078667, |
|
"learning_rate": 2.1006104453990506e-05, |
|
"loss": 0.0321, |
|
"step": 63350 |
|
}, |
|
{ |
|
"epoch": 0.91056630330188, |
|
"grad_norm": 0.03736149147152901, |
|
"learning_rate": 2.099888878733506e-05, |
|
"loss": 0.0349, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.9112844155284587, |
|
"grad_norm": 9.391321182250977, |
|
"learning_rate": 2.099167312067962e-05, |
|
"loss": 0.0424, |
|
"step": 63450 |
|
}, |
|
{ |
|
"epoch": 0.9120025277550375, |
|
"grad_norm": 0.01603916846215725, |
|
"learning_rate": 2.098445745402418e-05, |
|
"loss": 0.0337, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.9127206399816163, |
|
"grad_norm": 5.215839385986328, |
|
"learning_rate": 2.0977241787368734e-05, |
|
"loss": 0.0306, |
|
"step": 63550 |
|
}, |
|
{ |
|
"epoch": 0.9134387522081951, |
|
"grad_norm": 0.03886263445019722, |
|
"learning_rate": 2.0970026120713296e-05, |
|
"loss": 0.0331, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.9141568644347738, |
|
"grad_norm": 0.3178030848503113, |
|
"learning_rate": 2.0962810454057852e-05, |
|
"loss": 0.0417, |
|
"step": 63650 |
|
}, |
|
{ |
|
"epoch": 0.9148749766613526, |
|
"grad_norm": 0.026981327682733536, |
|
"learning_rate": 2.0955594787402407e-05, |
|
"loss": 0.043, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.9155930888879314, |
|
"grad_norm": 0.019250132143497467, |
|
"learning_rate": 2.094837912074697e-05, |
|
"loss": 0.0451, |
|
"step": 63750 |
|
}, |
|
{ |
|
"epoch": 0.9163112011145101, |
|
"grad_norm": 0.016561010852456093, |
|
"learning_rate": 2.0941163454091525e-05, |
|
"loss": 0.0355, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.917029313341089, |
|
"grad_norm": 0.011019432917237282, |
|
"learning_rate": 2.093394778743608e-05, |
|
"loss": 0.0514, |
|
"step": 63850 |
|
}, |
|
{ |
|
"epoch": 0.9177474255676678, |
|
"grad_norm": 2.2995824813842773, |
|
"learning_rate": 2.0926732120780642e-05, |
|
"loss": 0.0302, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.9184655377942464, |
|
"grad_norm": 0.013754851184785366, |
|
"learning_rate": 2.0919516454125197e-05, |
|
"loss": 0.0452, |
|
"step": 63950 |
|
}, |
|
{ |
|
"epoch": 0.9191836500208252, |
|
"grad_norm": 0.028004441410303116, |
|
"learning_rate": 2.0912300787469753e-05, |
|
"loss": 0.0459, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.919901762247404, |
|
"grad_norm": 0.016767442226409912, |
|
"learning_rate": 2.0905085120814315e-05, |
|
"loss": 0.0177, |
|
"step": 64050 |
|
}, |
|
{ |
|
"epoch": 0.9206198744739827, |
|
"grad_norm": 3.818044900894165, |
|
"learning_rate": 2.089801376749198e-05, |
|
"loss": 0.064, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.9213379867005616, |
|
"grad_norm": 0.04125185310840607, |
|
"learning_rate": 2.0890798100836536e-05, |
|
"loss": 0.0469, |
|
"step": 64150 |
|
}, |
|
{ |
|
"epoch": 0.9220560989271404, |
|
"grad_norm": 0.02808566577732563, |
|
"learning_rate": 2.0883582434181095e-05, |
|
"loss": 0.0454, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.922774211153719, |
|
"grad_norm": 0.03282586857676506, |
|
"learning_rate": 2.0876366767525653e-05, |
|
"loss": 0.0288, |
|
"step": 64250 |
|
}, |
|
{ |
|
"epoch": 0.9234923233802979, |
|
"grad_norm": 0.018743840977549553, |
|
"learning_rate": 2.086915110087021e-05, |
|
"loss": 0.046, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.9242104356068767, |
|
"grad_norm": 0.049901194870471954, |
|
"learning_rate": 2.0861935434214767e-05, |
|
"loss": 0.0595, |
|
"step": 64350 |
|
}, |
|
{ |
|
"epoch": 0.9249285478334555, |
|
"grad_norm": 34.75381851196289, |
|
"learning_rate": 2.0854719767559326e-05, |
|
"loss": 0.0414, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.9256466600600342, |
|
"grad_norm": 0.02926110103726387, |
|
"learning_rate": 2.084750410090388e-05, |
|
"loss": 0.0481, |
|
"step": 64450 |
|
}, |
|
{ |
|
"epoch": 0.926364772286613, |
|
"grad_norm": 0.06628941744565964, |
|
"learning_rate": 2.0840288434248444e-05, |
|
"loss": 0.0238, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.9270828845131918, |
|
"grad_norm": 0.5184805393218994, |
|
"learning_rate": 2.0833072767593e-05, |
|
"loss": 0.0169, |
|
"step": 64550 |
|
}, |
|
{ |
|
"epoch": 0.9278009967397705, |
|
"grad_norm": 0.03416599705815315, |
|
"learning_rate": 2.0825857100937554e-05, |
|
"loss": 0.0477, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.9285191089663493, |
|
"grad_norm": 0.10312914848327637, |
|
"learning_rate": 2.0818641434282116e-05, |
|
"loss": 0.0476, |
|
"step": 64650 |
|
}, |
|
{ |
|
"epoch": 0.9292372211929281, |
|
"grad_norm": 0.9014744758605957, |
|
"learning_rate": 2.0811425767626672e-05, |
|
"loss": 0.0496, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.9299553334195068, |
|
"grad_norm": 0.015887757763266563, |
|
"learning_rate": 2.0804210100971227e-05, |
|
"loss": 0.0311, |
|
"step": 64750 |
|
}, |
|
{ |
|
"epoch": 0.9306734456460856, |
|
"grad_norm": 0.5302422642707825, |
|
"learning_rate": 2.079699443431579e-05, |
|
"loss": 0.0492, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.9313915578726644, |
|
"grad_norm": 0.29680338501930237, |
|
"learning_rate": 2.0789778767660344e-05, |
|
"loss": 0.0251, |
|
"step": 64850 |
|
}, |
|
{ |
|
"epoch": 0.9321096700992431, |
|
"grad_norm": 0.003984624985605478, |
|
"learning_rate": 2.07825631010049e-05, |
|
"loss": 0.0304, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.9328277823258219, |
|
"grad_norm": 0.3101426064968109, |
|
"learning_rate": 2.0775347434349462e-05, |
|
"loss": 0.0151, |
|
"step": 64950 |
|
}, |
|
{ |
|
"epoch": 0.9335458945524007, |
|
"grad_norm": 0.15551728010177612, |
|
"learning_rate": 2.0768131767694017e-05, |
|
"loss": 0.0299, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.9342640067789795, |
|
"grad_norm": 0.03081173077225685, |
|
"learning_rate": 2.0760916101038573e-05, |
|
"loss": 0.0626, |
|
"step": 65050 |
|
}, |
|
{ |
|
"epoch": 0.9349821190055582, |
|
"grad_norm": 0.07034569978713989, |
|
"learning_rate": 2.0753700434383135e-05, |
|
"loss": 0.0275, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.935700231232137, |
|
"grad_norm": 21.155750274658203, |
|
"learning_rate": 2.074648476772769e-05, |
|
"loss": 0.0584, |
|
"step": 65150 |
|
}, |
|
{ |
|
"epoch": 0.9364183434587158, |
|
"grad_norm": 0.02575441636145115, |
|
"learning_rate": 2.073926910107225e-05, |
|
"loss": 0.0262, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.9371364556852945, |
|
"grad_norm": 0.5820505023002625, |
|
"learning_rate": 2.0732053434416808e-05, |
|
"loss": 0.0502, |
|
"step": 65250 |
|
}, |
|
{ |
|
"epoch": 0.9378545679118733, |
|
"grad_norm": 0.1326725035905838, |
|
"learning_rate": 2.0724837767761363e-05, |
|
"loss": 0.026, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.9385726801384521, |
|
"grad_norm": 0.29643315076828003, |
|
"learning_rate": 2.071762210110592e-05, |
|
"loss": 0.0224, |
|
"step": 65350 |
|
}, |
|
{ |
|
"epoch": 0.9392907923650308, |
|
"grad_norm": 0.006369379349052906, |
|
"learning_rate": 2.071040643445048e-05, |
|
"loss": 0.0427, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.9400089045916096, |
|
"grad_norm": 0.052178192883729935, |
|
"learning_rate": 2.0703190767795036e-05, |
|
"loss": 0.0478, |
|
"step": 65450 |
|
}, |
|
{ |
|
"epoch": 0.9407270168181884, |
|
"grad_norm": 0.07716100662946701, |
|
"learning_rate": 2.0695975101139594e-05, |
|
"loss": 0.0271, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.9414451290447671, |
|
"grad_norm": 0.019032573327422142, |
|
"learning_rate": 2.0688759434484153e-05, |
|
"loss": 0.0303, |
|
"step": 65550 |
|
}, |
|
{ |
|
"epoch": 0.9421632412713459, |
|
"grad_norm": 0.01876581460237503, |
|
"learning_rate": 2.068154376782871e-05, |
|
"loss": 0.0201, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.9428813534979247, |
|
"grad_norm": 0.013573722913861275, |
|
"learning_rate": 2.0674328101173267e-05, |
|
"loss": 0.0584, |
|
"step": 65650 |
|
}, |
|
{ |
|
"epoch": 0.9435994657245034, |
|
"grad_norm": 0.0182351041585207, |
|
"learning_rate": 2.0667112434517826e-05, |
|
"loss": 0.0464, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.9443175779510822, |
|
"grad_norm": 0.044160228222608566, |
|
"learning_rate": 2.065989676786238e-05, |
|
"loss": 0.0364, |
|
"step": 65750 |
|
}, |
|
{ |
|
"epoch": 0.945035690177661, |
|
"grad_norm": 0.17557665705680847, |
|
"learning_rate": 2.0652681101206943e-05, |
|
"loss": 0.0362, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.9457538024042398, |
|
"grad_norm": 0.008380572311580181, |
|
"learning_rate": 2.06454654345515e-05, |
|
"loss": 0.0328, |
|
"step": 65850 |
|
}, |
|
{ |
|
"epoch": 0.9464719146308185, |
|
"grad_norm": 0.020001381635665894, |
|
"learning_rate": 2.0638249767896057e-05, |
|
"loss": 0.0243, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.9471900268573973, |
|
"grad_norm": 0.14650805294513702, |
|
"learning_rate": 2.0631034101240616e-05, |
|
"loss": 0.0409, |
|
"step": 65950 |
|
}, |
|
{ |
|
"epoch": 0.9479081390839761, |
|
"grad_norm": 0.016235264018177986, |
|
"learning_rate": 2.062381843458517e-05, |
|
"loss": 0.0453, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.9486262513105548, |
|
"grad_norm": 2.048232316970825, |
|
"learning_rate": 2.061660276792973e-05, |
|
"loss": 0.0701, |
|
"step": 66050 |
|
}, |
|
{ |
|
"epoch": 0.9493443635371336, |
|
"grad_norm": 0.05848151445388794, |
|
"learning_rate": 2.060938710127429e-05, |
|
"loss": 0.0345, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.9500624757637124, |
|
"grad_norm": 0.03294019028544426, |
|
"learning_rate": 2.0602171434618844e-05, |
|
"loss": 0.0328, |
|
"step": 66150 |
|
}, |
|
{ |
|
"epoch": 0.9507805879902911, |
|
"grad_norm": 0.01830822415649891, |
|
"learning_rate": 2.0594955767963403e-05, |
|
"loss": 0.0138, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.9514987002168699, |
|
"grad_norm": 0.01788957603275776, |
|
"learning_rate": 2.058774010130796e-05, |
|
"loss": 0.015, |
|
"step": 66250 |
|
}, |
|
{ |
|
"epoch": 0.9522168124434487, |
|
"grad_norm": 0.6261688470840454, |
|
"learning_rate": 2.0580524434652517e-05, |
|
"loss": 0.0485, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.9529349246700274, |
|
"grad_norm": 0.030795125290751457, |
|
"learning_rate": 2.0573308767997076e-05, |
|
"loss": 0.0503, |
|
"step": 66350 |
|
}, |
|
{ |
|
"epoch": 0.9536530368966062, |
|
"grad_norm": 0.02338263764977455, |
|
"learning_rate": 2.0566093101341634e-05, |
|
"loss": 0.0448, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.954371149123185, |
|
"grad_norm": 0.39836427569389343, |
|
"learning_rate": 2.055887743468619e-05, |
|
"loss": 0.0601, |
|
"step": 66450 |
|
}, |
|
{ |
|
"epoch": 0.9550892613497638, |
|
"grad_norm": 0.0230514295399189, |
|
"learning_rate": 2.055166176803075e-05, |
|
"loss": 0.0371, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.9558073735763425, |
|
"grad_norm": 0.02001722902059555, |
|
"learning_rate": 2.0544446101375307e-05, |
|
"loss": 0.0435, |
|
"step": 66550 |
|
}, |
|
{ |
|
"epoch": 0.9565254858029213, |
|
"grad_norm": 0.03101685456931591, |
|
"learning_rate": 2.0537230434719866e-05, |
|
"loss": 0.0594, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.9572435980295001, |
|
"grad_norm": 0.02627638168632984, |
|
"learning_rate": 2.053001476806442e-05, |
|
"loss": 0.0273, |
|
"step": 66650 |
|
}, |
|
{ |
|
"epoch": 0.9579617102560788, |
|
"grad_norm": 0.02331429533660412, |
|
"learning_rate": 2.052279910140898e-05, |
|
"loss": 0.0452, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.9586798224826576, |
|
"grad_norm": 0.04160163179039955, |
|
"learning_rate": 2.051558343475354e-05, |
|
"loss": 0.031, |
|
"step": 66750 |
|
}, |
|
{ |
|
"epoch": 0.9593979347092364, |
|
"grad_norm": 123.35272979736328, |
|
"learning_rate": 2.0508367768098094e-05, |
|
"loss": 0.0523, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.9601160469358151, |
|
"grad_norm": 0.07898234575986862, |
|
"learning_rate": 2.0501152101442653e-05, |
|
"loss": 0.0292, |
|
"step": 66850 |
|
}, |
|
{ |
|
"epoch": 0.9608341591623939, |
|
"grad_norm": 1.0723378658294678, |
|
"learning_rate": 2.049393643478721e-05, |
|
"loss": 0.0563, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.9615522713889727, |
|
"grad_norm": 0.06465094536542892, |
|
"learning_rate": 2.0486720768131767e-05, |
|
"loss": 0.0157, |
|
"step": 66950 |
|
}, |
|
{ |
|
"epoch": 0.9622703836155514, |
|
"grad_norm": 0.03426423668861389, |
|
"learning_rate": 2.0479505101476326e-05, |
|
"loss": 0.0287, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.9629884958421302, |
|
"grad_norm": 0.013101859949529171, |
|
"learning_rate": 2.0472289434820884e-05, |
|
"loss": 0.0195, |
|
"step": 67050 |
|
}, |
|
{ |
|
"epoch": 0.963706608068709, |
|
"grad_norm": 3.0221145153045654, |
|
"learning_rate": 2.046507376816544e-05, |
|
"loss": 0.0597, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.9644247202952877, |
|
"grad_norm": 0.02117176167666912, |
|
"learning_rate": 2.045785810151e-05, |
|
"loss": 0.0303, |
|
"step": 67150 |
|
}, |
|
{ |
|
"epoch": 0.9651428325218665, |
|
"grad_norm": 3.6087586879730225, |
|
"learning_rate": 2.0450642434854557e-05, |
|
"loss": 0.057, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.9658609447484453, |
|
"grad_norm": 0.014843679033219814, |
|
"learning_rate": 2.0443426768199112e-05, |
|
"loss": 0.0242, |
|
"step": 67250 |
|
}, |
|
{ |
|
"epoch": 0.9665790569750241, |
|
"grad_norm": 0.5270505547523499, |
|
"learning_rate": 2.0436211101543674e-05, |
|
"loss": 0.0342, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.9672971692016028, |
|
"grad_norm": 0.031143778935074806, |
|
"learning_rate": 2.042899543488823e-05, |
|
"loss": 0.0415, |
|
"step": 67350 |
|
}, |
|
{ |
|
"epoch": 0.9680152814281816, |
|
"grad_norm": 30.298582077026367, |
|
"learning_rate": 2.0421779768232785e-05, |
|
"loss": 0.0355, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.9687333936547604, |
|
"grad_norm": 0.10604274272918701, |
|
"learning_rate": 2.0414564101577347e-05, |
|
"loss": 0.0599, |
|
"step": 67450 |
|
}, |
|
{ |
|
"epoch": 0.9694515058813391, |
|
"grad_norm": 0.03374650701880455, |
|
"learning_rate": 2.0407348434921903e-05, |
|
"loss": 0.0312, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.9701696181079179, |
|
"grad_norm": 14.213685989379883, |
|
"learning_rate": 2.0400132768266458e-05, |
|
"loss": 0.0481, |
|
"step": 67550 |
|
}, |
|
{ |
|
"epoch": 0.9708877303344967, |
|
"grad_norm": 1.0365869998931885, |
|
"learning_rate": 2.039291710161102e-05, |
|
"loss": 0.0324, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.9716058425610754, |
|
"grad_norm": 0.015141056850552559, |
|
"learning_rate": 2.0385701434955575e-05, |
|
"loss": 0.0085, |
|
"step": 67650 |
|
}, |
|
{ |
|
"epoch": 0.9723239547876542, |
|
"grad_norm": 0.015030864626169205, |
|
"learning_rate": 2.0378485768300134e-05, |
|
"loss": 0.0297, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.973042067014233, |
|
"grad_norm": 0.0173374954611063, |
|
"learning_rate": 2.0371270101644693e-05, |
|
"loss": 0.0255, |
|
"step": 67750 |
|
}, |
|
{ |
|
"epoch": 0.9737601792408117, |
|
"grad_norm": 0.011118406429886818, |
|
"learning_rate": 2.0364054434989248e-05, |
|
"loss": 0.0145, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.9744782914673905, |
|
"grad_norm": 0.0857364684343338, |
|
"learning_rate": 2.0356838768333807e-05, |
|
"loss": 0.0356, |
|
"step": 67850 |
|
}, |
|
{ |
|
"epoch": 0.9751964036939693, |
|
"grad_norm": 0.0073992363177239895, |
|
"learning_rate": 2.0349623101678366e-05, |
|
"loss": 0.026, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.975914515920548, |
|
"grad_norm": 0.03137329965829849, |
|
"learning_rate": 2.034240743502292e-05, |
|
"loss": 0.0345, |
|
"step": 67950 |
|
}, |
|
{ |
|
"epoch": 0.9766326281471268, |
|
"grad_norm": 4.4521050453186035, |
|
"learning_rate": 2.0335191768367483e-05, |
|
"loss": 0.0393, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.9773507403737056, |
|
"grad_norm": 0.43478429317474365, |
|
"learning_rate": 2.032797610171204e-05, |
|
"loss": 0.0281, |
|
"step": 68050 |
|
}, |
|
{ |
|
"epoch": 0.9780688526002844, |
|
"grad_norm": 1.3469187021255493, |
|
"learning_rate": 2.0320760435056594e-05, |
|
"loss": 0.0349, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.9787869648268631, |
|
"grad_norm": 2.2233638763427734, |
|
"learning_rate": 2.0313544768401156e-05, |
|
"loss": 0.0639, |
|
"step": 68150 |
|
}, |
|
{ |
|
"epoch": 0.9795050770534419, |
|
"grad_norm": 2.721013307571411, |
|
"learning_rate": 2.030632910174571e-05, |
|
"loss": 0.0153, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.9802231892800207, |
|
"grad_norm": 0.013984871096909046, |
|
"learning_rate": 2.0299113435090266e-05, |
|
"loss": 0.0468, |
|
"step": 68250 |
|
}, |
|
{ |
|
"epoch": 0.9809413015065994, |
|
"grad_norm": 0.13640810549259186, |
|
"learning_rate": 2.029189776843483e-05, |
|
"loss": 0.0539, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.9816594137331782, |
|
"grad_norm": 0.023162130266427994, |
|
"learning_rate": 2.0284682101779384e-05, |
|
"loss": 0.048, |
|
"step": 68350 |
|
}, |
|
{ |
|
"epoch": 0.982377525959757, |
|
"grad_norm": 0.007806818932294846, |
|
"learning_rate": 2.027746643512394e-05, |
|
"loss": 0.0221, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.9830956381863357, |
|
"grad_norm": 0.1331024169921875, |
|
"learning_rate": 2.02702507684685e-05, |
|
"loss": 0.045, |
|
"step": 68450 |
|
}, |
|
{ |
|
"epoch": 0.9838137504129145, |
|
"grad_norm": 0.027483688667416573, |
|
"learning_rate": 2.0263035101813057e-05, |
|
"loss": 0.02, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.9845318626394933, |
|
"grad_norm": 0.017357712611556053, |
|
"learning_rate": 2.0255819435157612e-05, |
|
"loss": 0.0299, |
|
"step": 68550 |
|
}, |
|
{ |
|
"epoch": 0.985249974866072, |
|
"grad_norm": 0.0151291498914361, |
|
"learning_rate": 2.0248603768502174e-05, |
|
"loss": 0.0305, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.9859680870926508, |
|
"grad_norm": 0.03477681055665016, |
|
"learning_rate": 2.024138810184673e-05, |
|
"loss": 0.0561, |
|
"step": 68650 |
|
}, |
|
{ |
|
"epoch": 0.9866861993192296, |
|
"grad_norm": 0.016767321154475212, |
|
"learning_rate": 2.0234172435191288e-05, |
|
"loss": 0.0401, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.9874043115458084, |
|
"grad_norm": 0.32830455899238586, |
|
"learning_rate": 2.0226956768535847e-05, |
|
"loss": 0.0557, |
|
"step": 68750 |
|
}, |
|
{ |
|
"epoch": 0.9881224237723871, |
|
"grad_norm": 5.949401378631592, |
|
"learning_rate": 2.0219741101880402e-05, |
|
"loss": 0.0062, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.9888405359989659, |
|
"grad_norm": 0.01927843689918518, |
|
"learning_rate": 2.021252543522496e-05, |
|
"loss": 0.0371, |
|
"step": 68850 |
|
}, |
|
{ |
|
"epoch": 0.9895586482255447, |
|
"grad_norm": 0.024355091154575348, |
|
"learning_rate": 2.020530976856952e-05, |
|
"loss": 0.0392, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.9902767604521234, |
|
"grad_norm": 2.1073899269104004, |
|
"learning_rate": 2.0198094101914075e-05, |
|
"loss": 0.0172, |
|
"step": 68950 |
|
}, |
|
{ |
|
"epoch": 0.9909948726787022, |
|
"grad_norm": 30.42909049987793, |
|
"learning_rate": 2.0190878435258634e-05, |
|
"loss": 0.0356, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.991712984905281, |
|
"grad_norm": 2.0507357120513916, |
|
"learning_rate": 2.0183662768603193e-05, |
|
"loss": 0.0344, |
|
"step": 69050 |
|
}, |
|
{ |
|
"epoch": 0.9924310971318597, |
|
"grad_norm": 0.010869896970689297, |
|
"learning_rate": 2.0176447101947748e-05, |
|
"loss": 0.0201, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.9931492093584385, |
|
"grad_norm": 170.85975646972656, |
|
"learning_rate": 2.0169231435292307e-05, |
|
"loss": 0.0148, |
|
"step": 69150 |
|
}, |
|
{ |
|
"epoch": 0.9938673215850173, |
|
"grad_norm": 34.987667083740234, |
|
"learning_rate": 2.0162015768636865e-05, |
|
"loss": 0.0465, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.994585433811596, |
|
"grad_norm": 4.180880546569824, |
|
"learning_rate": 2.015480010198142e-05, |
|
"loss": 0.0156, |
|
"step": 69250 |
|
}, |
|
{ |
|
"epoch": 0.9953035460381748, |
|
"grad_norm": 0.004112455993890762, |
|
"learning_rate": 2.014758443532598e-05, |
|
"loss": 0.0458, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.9960216582647536, |
|
"grad_norm": 0.01970968022942543, |
|
"learning_rate": 2.0140368768670538e-05, |
|
"loss": 0.0239, |
|
"step": 69350 |
|
}, |
|
{ |
|
"epoch": 0.9967397704913323, |
|
"grad_norm": 0.024871932342648506, |
|
"learning_rate": 2.0133153102015097e-05, |
|
"loss": 0.0472, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.9974578827179111, |
|
"grad_norm": 79.59233093261719, |
|
"learning_rate": 2.0125937435359652e-05, |
|
"loss": 0.042, |
|
"step": 69450 |
|
}, |
|
{ |
|
"epoch": 0.9981759949444899, |
|
"grad_norm": 0.008201238699257374, |
|
"learning_rate": 2.011872176870421e-05, |
|
"loss": 0.0375, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.9988941071710687, |
|
"grad_norm": 5.825506210327148, |
|
"learning_rate": 2.011150610204877e-05, |
|
"loss": 0.0456, |
|
"step": 69550 |
|
}, |
|
{ |
|
"epoch": 0.9996122193976474, |
|
"grad_norm": 8.27955436706543, |
|
"learning_rate": 2.0104290435393325e-05, |
|
"loss": 0.0497, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.9902156155185724 |
|
}, |
|
"eval_f1": { |
|
"f1": 0.9816237212470244 |
|
}, |
|
"eval_loss": 0.04228377714753151, |
|
"eval_precision": { |
|
"precision": 0.9665604249667995 |
|
}, |
|
"eval_recall": { |
|
"recall": 0.99716395621258 |
|
}, |
|
"eval_runtime": 478.7526, |
|
"eval_samples_per_second": 581.731, |
|
"eval_steps_per_second": 18.181, |
|
"step": 69627 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 208881, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.931101885589811e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|